WEKO3
アイテム
{"_buckets": {"deposit": "c8cd44e5-2ab7-49c4-9ffb-0a4a5a9d78d9"}, "_deposit": {"id": "419", "owners": [], "pid": {"revision_id": 0, "type": "depid", "value": "419"}, "status": "published"}, "_oai": {"id": "oai:repository.dl.itc.u-tokyo.ac.jp:00000419", "sets": ["13", "15"]}, "item_2_alternative_title_1": {"attribute_name": "その他のタイトル", "attribute_value_mlt": [{"subitem_alternative_title": "Extraction of Paraphrasing Pattern by Aligned Corpora of Web and Mobile Terminal News Articles"}]}, "item_2_biblio_info_7": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2005-10", "bibliographicIssueDateType": "Issued"}, "bibliographicIssueNumber": "5", "bibliographicPageEnd": "184", "bibliographicPageStart": "157", "bibliographicVolumeNumber": "12", "bibliographic_titles": [{"bibliographic_title": "自然言語処理"}]}]}, "item_2_description_13": {"attribute_name": "フォーマット", "attribute_value_mlt": [{"subitem_description": "application/pdf", "subitem_description_type": "Other"}]}, "item_2_description_5": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "本研究では,数十文字程度の長さで携帯端末向けに配信されている新聞記事と数百文字程度の長さのWeb 新聞記事の両者を約3 年に渡って収集した.こうして収集したコーパスから文末表現の縮約などの言い換え表現の抽出を機械的に行った.まず,Web から収集した携帯向け新聞記事とWeb 新聞記事からなるコーパスに対して記事単位の対応付けを行い,次に文単位の対応付けを行った.次に携帯向け記事文の文末の表現を形態素解析を用いて抽出し,その文に対応するWeb 新聞記事の文を集める.そしてWeb 新聞記事の文の文末から形態素ごとに言い換え先表現を抽出し,それに対して頻度等を用いた得点付け,および必要な名詞を欠落させてしまう不適切な言い換えの除去を行うことにより言い換え表現の抽出精度向上を図った.", "subitem_description_type": "Abstract"}, {"subitem_description": "We have collected both Web news-paper articles of several hundreds of characters, for three years and their counter parts distributed for mobile terminals, which consist of fifty to a hundred characters. Then, we extracted a number of candidates of paraphrases of the final part of sentences from them automatically. At first we have aligned these two types of corpus first at article level, then at sentence level. Next, we extract the final part of mobile article sentences using morphological analyzer, and collect their counterpart expressions of Web article sentences. Finally, we extracted the candidates of morpheme sequence from the final part of Web article sentence, then we propose the combination of two methods for them in order to improve the extraction accuracy of the sets: 1) ranking based on frequency, branching factor and length of string, and 2) filtering to remove inappropriate expressions which eliminate semantically indispensable nouns.", "subitem_description_type": "Abstract"}]}, "item_2_full_name_3": {"attribute_name": "著者別名", "attribute_value_mlt": [{"nameIdentifiers": [{"nameIdentifier": "106368", "nameIdentifierScheme": "WEKO"}], "names": [{"name": "Iwakoshi, Moritaka"}]}, {"nameIdentifiers": [{"nameIdentifier": "106369", "nameIdentifierScheme": "WEKO"}], "names": [{"name": "Masuda, Hidetaka"}]}, {"nameIdentifiers": [{"nameIdentifier": "106370", "nameIdentifierScheme": "WEKO"}], "names": [{"name": "Nakagawa, Hiroshi"}]}]}, "item_2_publisher_20": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "言語処理学会"}]}, "item_2_source_id_10": {"attribute_name": "書誌レコードID", "attribute_value_mlt": [{"subitem_source_identifier": "AN10472659", "subitem_source_identifier_type": "NCID"}]}, "item_2_source_id_8": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "13407619", "subitem_source_identifier_type": "ISSN"}]}, "item_2_subject_15": {"attribute_name": "日本十進分類法", "attribute_value_mlt": [{"subitem_subject": "007", "subitem_subject_scheme": "NDC"}]}, "item_2_text_21": {"attribute_name": "出版者別名", "attribute_value_mlt": [{"subitem_text_value": "The Association for Natural Language Processing"}]}, "item_2_text_34": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"subitem_text_value": "Journal Article"}]}, "item_2_text_4": {"attribute_name": "著者所属", "attribute_value_mlt": [{"subitem_text_value": "東京電機大学工学部"}, {"subitem_text_value": "東京大学情報基盤センター"}, {"subitem_text_value": "School of Engineering, Tokyo Denki University"}, {"subitem_text_value": "Information Technology Center,The University of Tokyo"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "岩越, 守孝"}], "nameIdentifiers": [{"nameIdentifier": "106365", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "増田, 英孝"}], "nameIdentifiers": [{"nameIdentifier": "106366", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "中川, 裕志"}], "nameIdentifiers": [{"nameIdentifier": "106367", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2017-06-26"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "v12n5_07.pdf", "filesize": [{"value": "302.9 kB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_free", "mimetype": "application/pdf", "size": 302900.0, "url": {"label": "v12n5_07.pdf", "url": "https://repository.dl.itc.u-tokyo.ac.jp/record/419/files/v12n5_07.pdf"}, "version_id": "415669e9-34e6-4fa0-9538-51b0fac6efec"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "言い換え", "subitem_subject_scheme": "Other"}, {"subitem_subject": "携帯端末", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Web", "subitem_subject_scheme": "Other"}, {"subitem_subject": "文末表現", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Paraphrase", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Mobile terminal", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Web", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Sentence final part", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "jpn"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "journal article", "resourceuri": "http://purl.org/coar/resource_type/c_6501"}]}, "item_title": "Webと携帯端末向けの新聞記事の対応コーパスからの文末言い換え抽出", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Webと携帯端末向けの新聞記事の対応コーパスからの文末言い換え抽出"}]}, "item_type_id": "2", "owner": "1", "path": ["13", "15"], "permalink_uri": "http://hdl.handle.net/2261/29447", "pubdate": {"attribute_name": "公開日", "attribute_value": "2009-12-15"}, "publish_date": "2009-12-15", "publish_status": "0", "recid": "419", "relation": {}, "relation_version_is_last": true, "title": ["Webと携帯端末向けの新聞記事の対応コーパスからの文末言い換え抽出"], "weko_shared_id": null}
Webと携帯端末向けの新聞記事の対応コーパスからの文末言い換え抽出
http://hdl.handle.net/2261/29447
http://hdl.handle.net/2261/29447fa03733c-4827-401e-8a5f-558b736cdc97
名前 / ファイル | ライセンス | アクション |
---|---|---|
v12n5_07.pdf (302.9 kB)
|
|
Item type | 学術雑誌論文 / Journal Article(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2009-12-15 | |||||
タイトル | ||||||
タイトル | Webと携帯端末向けの新聞記事の対応コーパスからの文末言い換え抽出 | |||||
言語 | ||||||
言語 | jpn | |||||
キーワード | ||||||
主題 | 言い換え | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | 携帯端末 | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | Web | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | 文末表現 | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | Paraphrase | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | Mobile terminal | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | Web | |||||
主題Scheme | Other | |||||
キーワード | ||||||
主題 | Sentence final part | |||||
主題Scheme | Other | |||||
資源タイプ | ||||||
資源 | http://purl.org/coar/resource_type/c_6501 | |||||
タイプ | journal article | |||||
その他のタイトル | ||||||
その他のタイトル | Extraction of Paraphrasing Pattern by Aligned Corpora of Web and Mobile Terminal News Articles | |||||
著者 |
岩越, 守孝
× 岩越, 守孝× 増田, 英孝× 中川, 裕志 |
|||||
著者別名 | ||||||
識別子 | 106368 | |||||
識別子Scheme | WEKO | |||||
姓名 | Iwakoshi, Moritaka | |||||
著者別名 | ||||||
識別子 | 106369 | |||||
識別子Scheme | WEKO | |||||
姓名 | Masuda, Hidetaka | |||||
著者別名 | ||||||
識別子 | 106370 | |||||
識別子Scheme | WEKO | |||||
姓名 | Nakagawa, Hiroshi | |||||
著者所属 | ||||||
著者所属 | 東京電機大学工学部 | |||||
著者所属 | ||||||
著者所属 | 東京大学情報基盤センター | |||||
著者所属 | ||||||
著者所属 | School of Engineering, Tokyo Denki University | |||||
著者所属 | ||||||
著者所属 | Information Technology Center,The University of Tokyo | |||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | 本研究では,数十文字程度の長さで携帯端末向けに配信されている新聞記事と数百文字程度の長さのWeb 新聞記事の両者を約3 年に渡って収集した.こうして収集したコーパスから文末表現の縮約などの言い換え表現の抽出を機械的に行った.まず,Web から収集した携帯向け新聞記事とWeb 新聞記事からなるコーパスに対して記事単位の対応付けを行い,次に文単位の対応付けを行った.次に携帯向け記事文の文末の表現を形態素解析を用いて抽出し,その文に対応するWeb 新聞記事の文を集める.そしてWeb 新聞記事の文の文末から形態素ごとに言い換え先表現を抽出し,それに対して頻度等を用いた得点付け,および必要な名詞を欠落させてしまう不適切な言い換えの除去を行うことにより言い換え表現の抽出精度向上を図った. | |||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | We have collected both Web news-paper articles of several hundreds of characters, for three years and their counter parts distributed for mobile terminals, which consist of fifty to a hundred characters. Then, we extracted a number of candidates of paraphrases of the final part of sentences from them automatically. At first we have aligned these two types of corpus first at article level, then at sentence level. Next, we extract the final part of mobile article sentences using morphological analyzer, and collect their counterpart expressions of Web article sentences. Finally, we extracted the candidates of morpheme sequence from the final part of Web article sentence, then we propose the combination of two methods for them in order to improve the extraction accuracy of the sets: 1) ranking based on frequency, branching factor and length of string, and 2) filtering to remove inappropriate expressions which eliminate semantically indispensable nouns. | |||||
書誌情報 |
自然言語処理 巻 12, 号 5, p. 157-184, 発行日 2005-10 |
|||||
ISSN | ||||||
収録物識別子タイプ | ISSN | |||||
収録物識別子 | 13407619 | |||||
書誌レコードID | ||||||
収録物識別子タイプ | NCID | |||||
収録物識別子 | AN10472659 | |||||
フォーマット | ||||||
内容記述タイプ | Other | |||||
内容記述 | application/pdf | |||||
日本十進分類法 | ||||||
主題 | 007 | |||||
主題Scheme | NDC | |||||
出版者 | ||||||
出版者 | 言語処理学会 | |||||
出版者別名 | ||||||
The Association for Natural Language Processing |