happy8825/MMLongBench_var4_deepeyes_multiturn_deterministic
收藏Hugging Face2025-12-14 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/happy8825/MMLongBench_var4_deepeyes_multiturn_deterministic
下载链接
链接失效反馈官方服务:
资源简介:
<!-- SIMPLEDOC_AUTO_SUMMARIES_START -->
### MMLongBench – 2025-12-14 13:43 UTC
```
Average accuracy: 46.74% (1072 samples with scores)
Subset metrics by evidence source:
Pure-text (Plain-text): samples=302, accuracy=48.01%
Figure: samples=299, accuracy=38.46%
Table: samples=217, accuracy=42.40%
Chart: samples=175, accuracy=41.71%
Generalized-text (Layout): samples=119, accuracy=34.45%
Subset metrics by evidence pages length:
no_pages: samples=226, accuracy=57.08%
single_page: samples=489, accuracy=52.76%
multiple_pages: samples=357, accuracy=31.93%
Done: Results saved to /hub_data2/seohyun/outputs/var4_deepeyes_multiturn_deterministic/simpledoc_eval/MMLongBench/eval_results.jsonl
Results source: /hub_data2/seohyun/outputs/var4_deepeyes_multiturn_deterministic/results.json
```
---
### MMLongBench – 2025-12-14 10:56 UTC
```
Average accuracy: 50.00% (10 samples with scores)
Subset metrics by evidence source:
Chart: samples=5, accuracy=40.00%
Pure-text (Plain-text): samples=2, accuracy=0.00%
Generalized-text (Layout): samples=1, accuracy=0.00%
Table: samples=1, accuracy=100.00%
Subset metrics by evidence pages length:
no_pages: samples=2, accuracy=100.00%
single_page: samples=4, accuracy=50.00%
multiple_pages: samples=4, accuracy=25.00%
Done: Results saved to /hub_data2/seohyun/outputs/var4_deepeyes_multiturn_deterministic/simpledoc_eval/MMLongBench/eval_results.jsonl
Results source: /hub_data2/seohyun/outputs/var4_deepeyes_multiturn_deterministic/results.json
```
<!-- SIMPLEDOC_AUTO_SUMMARIES_END -->
---
configs:
- config_name: default
data_files:
- split: train
path: data/train-*
dataset_info:
features:
- name: relevant_pages
list: int64
- name: evidence_pages
list: int64
- name: score
dtype: int64
- name: doc_id
dtype: string
- name: doc_type
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence_sources
list: string
- name: final_answer
dtype: string
- name: turn1_colqwen_query
dtype: 'null'
- name: turn1_colqwen_retrieval_results
dtype: 'null'
- name: turn1_llm_query_input
dtype: 'null'
- name: turn1_llm_retrieval_results
struct:
- name: document_summary
dtype: string
- name: relevant_pages
list: int64
- name: turn1_llm_raw_output
dtype: string
- name: turn1_memory_out
dtype: string
- name: turn2_memory_in
dtype: string
- name: turn2_vlm_prompt_input
dtype: string
- name: turn2_vlm_raw_output
dtype: string
- name: turn2_final_answer
dtype: string
- name: turn2_response_type
dtype: string
- name: turn2_updated_question
dtype: 'null'
- name: turn2_notes
dtype: 'null'
- name: turn2_vlm_turn1_input_image0_ref
dtype: string
- name: turn2_vlm_turn1_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn1_prompt
dtype: string
- name: turn2_vlm_turn1_raw_output
dtype: string
- name: turn2_vlm_turn1_zoom_box
list: float64
- name: turn2_vlm_turn1_zoom_page_index
dtype: int64
- name: turn2_vlm_turn2_input_image0_ref
dtype: string
- name: turn2_vlm_turn2_input_image1_ref
dtype: string
- name: turn2_vlm_turn2_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn2_prompt
dtype: string
- name: turn2_vlm_turn2_raw_output
dtype: string
- name: turn2_vlm_turn2_zoom_box
list: float64
- name: turn2_vlm_turn2_zoom_page_index
dtype: int64
- name: turn2_vlm_turn3_input_image0_ref
dtype: string
- name: turn2_vlm_turn3_input_image1_ref
dtype: string
- name: turn2_vlm_turn3_input_image2_ref
dtype: string
- name: turn2_vlm_turn3_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn3_prompt
dtype: string
- name: turn2_vlm_turn3_raw_output
dtype: string
- name: turn2_vlm_turn3_zoom_box
list: float64
- name: turn2_vlm_turn3_zoom_page_index
dtype: int64
- name: turn2_vlm_turn4_input_image0_ref
dtype: string
- name: turn2_vlm_turn4_input_image1_ref
dtype: string
- name: turn2_vlm_turn4_input_image2_ref
dtype: string
- name: turn2_vlm_turn4_input_image3_ref
dtype: string
- name: turn2_vlm_turn4_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn4_prompt
dtype: string
- name: turn2_vlm_turn4_raw_output
dtype: string
- name: turn2_vlm_turn4_zoom_box
list: float64
- name: turn2_vlm_turn4_zoom_page_index
dtype: int64
- name: turn2_vlm_turn5_input_image0_ref
dtype: string
- name: turn2_vlm_turn5_input_image1_ref
dtype: string
- name: turn2_vlm_turn5_input_image2_ref
dtype: string
- name: turn2_vlm_turn5_input_image3_ref
dtype: string
- name: turn2_vlm_turn5_input_image4_ref
dtype: string
- name: turn2_vlm_turn5_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn5_prompt
dtype: string
- name: turn2_vlm_turn5_raw_output
dtype: string
- name: turn2_vlm_turn5_zoom_box
list: float64
- name: turn2_vlm_turn5_zoom_page_index
dtype: int64
- name: turn2_vlm_turn6_input_image0_ref
dtype: string
- name: turn2_vlm_turn6_input_image1_ref
dtype: string
- name: turn2_vlm_turn6_input_image2_ref
dtype: string
- name: turn2_vlm_turn6_input_image3_ref
dtype: string
- name: turn2_vlm_turn6_input_image4_ref
dtype: string
- name: turn2_vlm_turn6_input_image5_ref
dtype: string
- name: turn2_vlm_turn6_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn6_prompt
dtype: string
- name: turn2_vlm_turn6_raw_output
dtype: string
- name: turn2_vlm_turn6_zoom_box
list: float64
- name: turn2_vlm_turn6_zoom_page_index
dtype: int64
- name: turn2_vlm_turn7_input_image0_ref
dtype: string
- name: turn2_vlm_turn7_input_image1_ref
dtype: string
- name: turn2_vlm_turn7_input_image2_ref
dtype: string
- name: turn2_vlm_turn7_input_image3_ref
dtype: string
- name: turn2_vlm_turn7_input_image4_ref
dtype: string
- name: turn2_vlm_turn7_input_image5_ref
dtype: string
- name: turn2_vlm_turn7_input_image6_ref
dtype: string
- name: turn2_vlm_turn7_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn7_prompt
dtype: string
- name: turn2_vlm_turn7_raw_output
dtype: string
splits:
- name: train
num_bytes: 22703088
num_examples: 1073
download_size: 4689003
dataset_size: 22703088
---
---
dataset_info:
features:
- name: relevant_pages
list: int64
- name: evidence_pages
list: int64
- name: score
dtype: int64
- name: doc_id
dtype: string
- name: doc_type
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence_sources
list: string
- name: final_answer
dtype: string
- name: turn1_colqwen_query
dtype: 'null'
- name: turn1_colqwen_retrieval_results
dtype: 'null'
- name: turn1_llm_query_input
dtype: 'null'
- name: turn1_llm_retrieval_results
struct:
- name: document_summary
dtype: string
- name: relevant_pages
list: int64
- name: turn1_llm_raw_output
dtype: string
- name: turn1_memory_out
dtype: string
- name: turn2_memory_in
dtype: string
- name: turn2_vlm_prompt_input
dtype: string
- name: turn2_vlm_raw_output
dtype: string
- name: turn2_final_answer
dtype: string
- name: turn2_response_type
dtype: string
- name: turn2_updated_question
dtype: 'null'
- name: turn2_notes
dtype: 'null'
- name: turn2_vlm_turn1_input_image0_ref
dtype: string
- name: turn2_vlm_turn1_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn1_prompt
dtype: string
- name: turn2_vlm_turn1_raw_output
dtype: string
- name: turn2_vlm_turn1_zoom_box
list: float64
- name: turn2_vlm_turn1_zoom_page_index
dtype: int64
- name: turn2_vlm_turn2_input_image0_ref
dtype: string
- name: turn2_vlm_turn2_input_image1_ref
dtype: string
- name: turn2_vlm_turn2_input_messages
list:
- name: content
list:
- name: ref
dtype: string
- name: text
dtype: string
- name: type
dtype: string
- name: role
dtype: string
- name: turn2_vlm_turn2_prompt
dtype: string
- name: turn2_vlm_turn2_raw_output
dtype: string
- name: turn2_vlm_turn2_zoom_box
list: float64
- name: turn2_vlm_turn2_zoom_page_index
dtype: int64
splits:
- name: train
num_bytes: 216334
num_examples: 10
download_size: 108839
dataset_size: 216334
configs:
- config_name: default
data_files:
- split: train
path: data/train-*
---
提供机构:
happy8825



