mirror of
https://github.com/RYDE-WORK/Advanced-RAG.git
synced 2026-01-19 13:56:01 +08:00
2187 lines
78 KiB
Plaintext
2187 lines
78 KiB
Plaintext
{
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0,
|
||
"metadata": {
|
||
"colab": {
|
||
"provenance": [],
|
||
"gpuType": "T4",
|
||
"authorship_tag": "ABX9TyOsgwsp69IaZ0UMnvAVkdVX",
|
||
"include_colab_link": true
|
||
},
|
||
"kernelspec": {
|
||
"name": "python3",
|
||
"display_name": "Python 3"
|
||
},
|
||
"language_info": {
|
||
"name": "python"
|
||
},
|
||
"widgets": {
|
||
"application/vnd.jupyter.widget-state+json": {
|
||
"ac0fab4eb8e543c1a0f5b037e0235815": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HBoxModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HBoxModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HBoxView",
|
||
"box_style": "",
|
||
"children": [
|
||
"IPY_MODEL_3649dec7e417482091fc2f86939e0c16",
|
||
"IPY_MODEL_aa165def4267499fa5bd66c7a23aa83d",
|
||
"IPY_MODEL_4ee55dbff1674ba9bdb0614692dc64e9"
|
||
],
|
||
"layout": "IPY_MODEL_54445f16b2e5439dbea3d25c7d690933"
|
||
}
|
||
},
|
||
"3649dec7e417482091fc2f86939e0c16": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_b240ed924ce94639b137552549ffd29e",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_c0f954fb5893438fbc5306ba55d44c20",
|
||
"value": "yolox_l0.05.onnx: 100%"
|
||
}
|
||
},
|
||
"aa165def4267499fa5bd66c7a23aa83d": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "FloatProgressModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "FloatProgressModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "ProgressView",
|
||
"bar_style": "success",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_cf8f1809505a436c949d936e74f93094",
|
||
"max": 216625723,
|
||
"min": 0,
|
||
"orientation": "horizontal",
|
||
"style": "IPY_MODEL_7639470b337e47b0b7a60d32c8770744",
|
||
"value": 216625723
|
||
}
|
||
},
|
||
"4ee55dbff1674ba9bdb0614692dc64e9": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_17913e64f28b4a2daf9f9d21654c6372",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_ef27bad8255f447da09541fc10f80ad8",
|
||
"value": " 217M/217M [00:01<00:00, 160MB/s]"
|
||
}
|
||
},
|
||
"54445f16b2e5439dbea3d25c7d690933": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"b240ed924ce94639b137552549ffd29e": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"c0f954fb5893438fbc5306ba55d44c20": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"cf8f1809505a436c949d936e74f93094": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"7639470b337e47b0b7a60d32c8770744": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "ProgressStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "ProgressStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"bar_color": null,
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"17913e64f28b4a2daf9f9d21654c6372": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"ef27bad8255f447da09541fc10f80ad8": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"8ee21ab783c543c0944e8732faaebb4a": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HBoxModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HBoxModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HBoxView",
|
||
"box_style": "",
|
||
"children": [
|
||
"IPY_MODEL_2024899dae44406a9e187c5d770f873f",
|
||
"IPY_MODEL_2174f17cbb124ab2bf1a32765c0d5a0c",
|
||
"IPY_MODEL_330603dd11944ad9a267378e3a17a573"
|
||
],
|
||
"layout": "IPY_MODEL_674ae68fb32d40b48152f29f77636138"
|
||
}
|
||
},
|
||
"2024899dae44406a9e187c5d770f873f": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_c1cd34e355a540f58cebe58990bee350",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_2248e9a15d694e7f94b6d817a0bceb90",
|
||
"value": "config.json: 100%"
|
||
}
|
||
},
|
||
"2174f17cbb124ab2bf1a32765c0d5a0c": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "FloatProgressModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "FloatProgressModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "ProgressView",
|
||
"bar_style": "success",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_2477901c30b44069ac5db5cf4e8924f0",
|
||
"max": 1469,
|
||
"min": 0,
|
||
"orientation": "horizontal",
|
||
"style": "IPY_MODEL_f2d4643e36634b668eb2d749d74e7780",
|
||
"value": 1469
|
||
}
|
||
},
|
||
"330603dd11944ad9a267378e3a17a573": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_f9a39e14cb0140e8ac09cfb400344804",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_9e0916f00d374a858a3edb397a63af8d",
|
||
"value": " 1.47k/1.47k [00:00<00:00, 71.8kB/s]"
|
||
}
|
||
},
|
||
"674ae68fb32d40b48152f29f77636138": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"c1cd34e355a540f58cebe58990bee350": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"2248e9a15d694e7f94b6d817a0bceb90": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"2477901c30b44069ac5db5cf4e8924f0": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"f2d4643e36634b668eb2d749d74e7780": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "ProgressStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "ProgressStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"bar_color": null,
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"f9a39e14cb0140e8ac09cfb400344804": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"9e0916f00d374a858a3edb397a63af8d": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"5b720610ad684d5abe3ca6f3acdf6606": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HBoxModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HBoxModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HBoxView",
|
||
"box_style": "",
|
||
"children": [
|
||
"IPY_MODEL_127c410899934087904875f527a2dbba",
|
||
"IPY_MODEL_2f9d37688cfa46059f152c0e6b080904",
|
||
"IPY_MODEL_cd3f489a29c8448c9a01f595690c0e48"
|
||
],
|
||
"layout": "IPY_MODEL_f9056446dd594a2aaec5396995a5f065"
|
||
}
|
||
},
|
||
"127c410899934087904875f527a2dbba": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_42efb45824894d808c9e31ef7554a852",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_181f2590b9464b929bf9b4e3b5c88b5c",
|
||
"value": "model.safetensors: 100%"
|
||
}
|
||
},
|
||
"2f9d37688cfa46059f152c0e6b080904": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "FloatProgressModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "FloatProgressModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "ProgressView",
|
||
"bar_style": "success",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_4f40596f290146d8b53dc3dda50a09e7",
|
||
"max": 115434268,
|
||
"min": 0,
|
||
"orientation": "horizontal",
|
||
"style": "IPY_MODEL_9869010a1cb949f7ae1feeef095fc05e",
|
||
"value": 115434268
|
||
}
|
||
},
|
||
"cd3f489a29c8448c9a01f595690c0e48": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_1cc0b1752a704fa0b8687d0411539055",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_089092011cf341379b9be17a052830f1",
|
||
"value": " 115M/115M [00:00<00:00, 187MB/s]"
|
||
}
|
||
},
|
||
"f9056446dd594a2aaec5396995a5f065": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"42efb45824894d808c9e31ef7554a852": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"181f2590b9464b929bf9b4e3b5c88b5c": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"4f40596f290146d8b53dc3dda50a09e7": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"9869010a1cb949f7ae1feeef095fc05e": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "ProgressStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "ProgressStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"bar_color": null,
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"1cc0b1752a704fa0b8687d0411539055": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"089092011cf341379b9be17a052830f1": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"f4321c31f68a458382b2b8b969993831": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HBoxModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HBoxModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HBoxView",
|
||
"box_style": "",
|
||
"children": [
|
||
"IPY_MODEL_e8ed92210b654289a7732b2e76f8dcad",
|
||
"IPY_MODEL_7428d3cc85f148fc8659b2f7e5e1afe6",
|
||
"IPY_MODEL_c34b003c7d524da0bb39e8c8cfe63d44"
|
||
],
|
||
"layout": "IPY_MODEL_8fd5fbbf855c43428ff5253adda6a4b3"
|
||
}
|
||
},
|
||
"e8ed92210b654289a7732b2e76f8dcad": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_f623dbd8c9b5462bab46250eb7d492e6",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_bb8ab670115c46378f448a7ca93726e2",
|
||
"value": "model.safetensors: 100%"
|
||
}
|
||
},
|
||
"7428d3cc85f148fc8659b2f7e5e1afe6": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "FloatProgressModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "FloatProgressModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "ProgressView",
|
||
"bar_style": "success",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_dc74c156b5234141934265b417a5ffcb",
|
||
"max": 46807446,
|
||
"min": 0,
|
||
"orientation": "horizontal",
|
||
"style": "IPY_MODEL_d18a0ce1bd7149d388d12cdd86c3390c",
|
||
"value": 46807446
|
||
}
|
||
},
|
||
"c34b003c7d524da0bb39e8c8cfe63d44": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "HTMLModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_dom_classes": [],
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "HTMLModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/controls",
|
||
"_view_module_version": "1.5.0",
|
||
"_view_name": "HTMLView",
|
||
"description": "",
|
||
"description_tooltip": null,
|
||
"layout": "IPY_MODEL_fb00943087d0426fad871d9d6ed68a2a",
|
||
"placeholder": "",
|
||
"style": "IPY_MODEL_4e25db89a38944f1bf63427969d90caa",
|
||
"value": " 46.8M/46.8M [00:00<00:00, 177MB/s]"
|
||
}
|
||
},
|
||
"8fd5fbbf855c43428ff5253adda6a4b3": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"f623dbd8c9b5462bab46250eb7d492e6": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"bb8ab670115c46378f448a7ca93726e2": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"dc74c156b5234141934265b417a5ffcb": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"d18a0ce1bd7149d388d12cdd86c3390c": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "ProgressStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "ProgressStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"bar_color": null,
|
||
"description_width": ""
|
||
}
|
||
},
|
||
"fb00943087d0426fad871d9d6ed68a2a": {
|
||
"model_module": "@jupyter-widgets/base",
|
||
"model_name": "LayoutModel",
|
||
"model_module_version": "1.2.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/base",
|
||
"_model_module_version": "1.2.0",
|
||
"_model_name": "LayoutModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "LayoutView",
|
||
"align_content": null,
|
||
"align_items": null,
|
||
"align_self": null,
|
||
"border": null,
|
||
"bottom": null,
|
||
"display": null,
|
||
"flex": null,
|
||
"flex_flow": null,
|
||
"grid_area": null,
|
||
"grid_auto_columns": null,
|
||
"grid_auto_flow": null,
|
||
"grid_auto_rows": null,
|
||
"grid_column": null,
|
||
"grid_gap": null,
|
||
"grid_row": null,
|
||
"grid_template_areas": null,
|
||
"grid_template_columns": null,
|
||
"grid_template_rows": null,
|
||
"height": null,
|
||
"justify_content": null,
|
||
"justify_items": null,
|
||
"left": null,
|
||
"margin": null,
|
||
"max_height": null,
|
||
"max_width": null,
|
||
"min_height": null,
|
||
"min_width": null,
|
||
"object_fit": null,
|
||
"object_position": null,
|
||
"order": null,
|
||
"overflow": null,
|
||
"overflow_x": null,
|
||
"overflow_y": null,
|
||
"padding": null,
|
||
"right": null,
|
||
"top": null,
|
||
"visibility": null,
|
||
"width": null
|
||
}
|
||
},
|
||
"4e25db89a38944f1bf63427969d90caa": {
|
||
"model_module": "@jupyter-widgets/controls",
|
||
"model_name": "DescriptionStyleModel",
|
||
"model_module_version": "1.5.0",
|
||
"state": {
|
||
"_model_module": "@jupyter-widgets/controls",
|
||
"_model_module_version": "1.5.0",
|
||
"_model_name": "DescriptionStyleModel",
|
||
"_view_count": null,
|
||
"_view_module": "@jupyter-widgets/base",
|
||
"_view_module_version": "1.2.0",
|
||
"_view_name": "StyleView",
|
||
"description_width": ""
|
||
}
|
||
}
|
||
}
|
||
},
|
||
"accelerator": "GPU"
|
||
},
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "view-in-github",
|
||
"colab_type": "text"
|
||
},
|
||
"source": [
|
||
"<a href=\"https://colab.research.google.com/github/sugarforever/Advanced-RAG/blob/main/01_semi_structured_data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"# Advanced RAG - 01. RAG on Semi-structured data\n",
|
||
"\n",
|
||
"**What is RAG?**\n",
|
||
"\n",
|
||
"Retrieval augmented generation (RAG) is a natural language processing (NLP) technique that employes the capabilities of retrieval and generative based AI models.\n",
|
||
"\n",
|
||
"**What is Naive RAG?**\n",
|
||
"\n",
|
||
"Naive RAG often refers to splitting documents into chunks, embedding them, and retrieving chunks based on semantic similarity search to a user question.\n",
|
||
"\n",
|
||
"It's simple, but of poor overall performance.\n",
|
||
"\n",
|
||
"**That's why we need Advanced RAG.**\n",
|
||
"\n",
|
||
"In this tutorials (**Advanced RAG**), we will learn the techniques and best practices in RAG application development, that can improve the quality of the RAG.\n",
|
||
"\n",
|
||
"It's crucial to the success of a RAG application.\n",
|
||
"\n",
|
||
"## 01. RAG on Semi-structured data\n",
|
||
"\n",
|
||
"### Introduction\n",
|
||
"\n",
|
||
"#### ✏️ What is Structured Data?\n",
|
||
"\n",
|
||
"Structured data is organized information with a predefined format, typically stored in rows and columns, making it easy to search and analyze.\n",
|
||
"\n",
|
||
"#### ✏️ What is Unstructured Data?\n",
|
||
"\n",
|
||
"Unstructured data is information that lacks a specific format or organization, often in the form of text, images, or multimedia, making it challenging to analyze without specialized techniques.\n",
|
||
"\n",
|
||
"#### ✏️ What is Semi-structured Data?\n",
|
||
"\n",
|
||
"Apparently, semi-structured data is the mix of them above.\n",
|
||
"\n",
|
||
"It's challenging for RAG to process semi-structured data, as:\n",
|
||
"\n",
|
||
"1. Text splitting may break up tables\n",
|
||
"2. Tables and images are challenging for embedding and semantic search\n",
|
||
"\n",
|
||
"The typical semi-structured data is PDF document that contains text, tables, images and so on.\n",
|
||
"\n",
|
||
"In this tutorial, let's use the following components to showcase how to build RAG on top of semi-structured data:\n",
|
||
"\n",
|
||
"1. ✂️ [unstructured](https://github.com/Unstructured-IO/unstructured)\n",
|
||
" \n",
|
||
" Open source libraries and APIs to build custom preprocessing pipelines for labeling, training, or production machine learning pipelines.\n",
|
||
"\n",
|
||
" We will use it to parse PDF documents and extract different types of elements seperately, such as text, table, and image\n",
|
||
"\n",
|
||
"2. 🦜 [LangChain](https://github.com/langchain-ai/langchain)\n",
|
||
"\n",
|
||
"3. 🗂 [Chromadb](https://github.com/chroma-core/chroma)\n",
|
||
"\n",
|
||
" Vector data storage\n",
|
||
"\n",
|
||
"The PDF document we use in this example is the [NVIDIA Statement of Changes](https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/381953f9-934e-4cc8-b099-144910676bad.pdf). It's a small PDF file containing several tables which is a good example for quick data processing and clear demonstration."
|
||
],
|
||
"metadata": {
|
||
"id": "AeO6Vg59nrPP"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"### Prepare Environment"
|
||
],
|
||
"metadata": {
|
||
"id": "K-e_9LiVuJeQ"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Let's install the necessary Python packages."
|
||
],
|
||
"metadata": {
|
||
"id": "wVP2xyg3uh1r"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "kaIyvvQuB4uu",
|
||
"outputId": "a1a32063-5712-45fc-853b-e6140c4e2542"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||
"llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n",
|
||
"\u001b[0m"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install langchain unstructured[all-docs] pydantic lxml openai chromadb tiktoken -q -U"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Download the PDF file and name it as `statement_of_changes.pdf`."
|
||
],
|
||
"metadata": {
|
||
"id": "O7276PRIuq4k"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!wget -O statement_of_changes.pdf https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/381953f9-934e-4cc8-b099-144910676bad.pdf"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "BInAbAKNDP55",
|
||
"outputId": "5476a09a-9d04-47d7-d8ff-688c397c49c0"
|
||
},
|
||
"execution_count": 2,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"--2023-11-20 21:19:43-- https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/381953f9-934e-4cc8-b099-144910676bad.pdf\n",
|
||
"Resolving d18rn0p25nwr6d.cloudfront.net (d18rn0p25nwr6d.cloudfront.net)... 52.84.122.100, 52.84.122.47, 52.84.122.58, ...\n",
|
||
"Connecting to d18rn0p25nwr6d.cloudfront.net (d18rn0p25nwr6d.cloudfront.net)|52.84.122.100|:443... connected.\n",
|
||
"HTTP request sent, awaiting response... 200 OK\n",
|
||
"Length: 119037 (116K) [application/pdf]\n",
|
||
"Saving to: ‘statement_of_changes.pdf’\n",
|
||
"\n",
|
||
"statement_of_change 100%[===================>] 116.25K --.-KB/s in 0.07s \n",
|
||
"\n",
|
||
"2023-11-20 21:19:44 (1.52 MB/s) - ‘statement_of_changes.pdf’ saved [119037/119037]\n",
|
||
"\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"Install required platform packages:\n",
|
||
"\n",
|
||
"- poppler-utils\n",
|
||
" \n",
|
||
" A collection of command-line utilities built on Poppler's library API, to manage PDF and extract contents\n",
|
||
"\n",
|
||
"- tesseract-ocr\n",
|
||
"\n",
|
||
" Optical character recognition engine"
|
||
],
|
||
"metadata": {
|
||
"id": "SOSkdNt9ux_Z"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!apt-get install poppler-utils tesseract-ocr"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "ER_h_kAPatZO",
|
||
"outputId": "7c229fc7-d8dd-461f-ae80-88fb79f640d9"
|
||
},
|
||
"execution_count": 3,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Reading package lists... Done\n",
|
||
"Building dependency tree... Done\n",
|
||
"Reading state information... Done\n",
|
||
"The following additional packages will be installed:\n",
|
||
" tesseract-ocr-eng tesseract-ocr-osd\n",
|
||
"The following NEW packages will be installed:\n",
|
||
" poppler-utils tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd\n",
|
||
"0 upgraded, 4 newly installed, 0 to remove and 9 not upgraded.\n",
|
||
"Need to get 5,002 kB of archives.\n",
|
||
"After this operation, 16.3 MB of additional disk space will be used.\n",
|
||
"Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.2 [186 kB]\n",
|
||
"Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]\n",
|
||
"Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]\n",
|
||
"Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]\n",
|
||
"Fetched 5,002 kB in 1s (3,337 kB/s)\n",
|
||
"Selecting previously unselected package poppler-utils.\n",
|
||
"(Reading database ... 120880 files and directories currently installed.)\n",
|
||
"Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.2_amd64.deb ...\n",
|
||
"Unpacking poppler-utils (22.02.0-2ubuntu0.2) ...\n",
|
||
"Selecting previously unselected package tesseract-ocr-eng.\n",
|
||
"Preparing to unpack .../tesseract-ocr-eng_1%3a4.00~git30-7274cfa-1.1_all.deb ...\n",
|
||
"Unpacking tesseract-ocr-eng (1:4.00~git30-7274cfa-1.1) ...\n",
|
||
"Selecting previously unselected package tesseract-ocr-osd.\n",
|
||
"Preparing to unpack .../tesseract-ocr-osd_1%3a4.00~git30-7274cfa-1.1_all.deb ...\n",
|
||
"Unpacking tesseract-ocr-osd (1:4.00~git30-7274cfa-1.1) ...\n",
|
||
"Selecting previously unselected package tesseract-ocr.\n",
|
||
"Preparing to unpack .../tesseract-ocr_4.1.1-2.1build1_amd64.deb ...\n",
|
||
"Unpacking tesseract-ocr (4.1.1-2.1build1) ...\n",
|
||
"Setting up tesseract-ocr-eng (1:4.00~git30-7274cfa-1.1) ...\n",
|
||
"Setting up tesseract-ocr-osd (1:4.00~git30-7274cfa-1.1) ...\n",
|
||
"Setting up poppler-utils (22.02.0-2ubuntu0.2) ...\n",
|
||
"Setting up tesseract-ocr (4.1.1-2.1build1) ...\n",
|
||
"Processing triggers for man-db (2.10.2-1) ...\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import os\n",
|
||
"\n",
|
||
"os.environ[\"OPENAI_API_KEY\"] = \"Your Valid OpenAI API Key\""
|
||
],
|
||
"metadata": {
|
||
"id": "o2lSAfBCxt4w"
|
||
},
|
||
"execution_count": null,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"### Coding"
|
||
],
|
||
"metadata": {
|
||
"id": "S336PokNwlE-"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"1. Use `unstructured` library to partition the PDF document into different type of elements."
|
||
],
|
||
"metadata": {
|
||
"id": "sGONjGYXw0ot"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from typing import Any\n",
|
||
"\n",
|
||
"from pydantic import BaseModel\n",
|
||
"from unstructured.partition.pdf import partition_pdf\n",
|
||
"\n",
|
||
"raw_pdf_elements = partition_pdf(\n",
|
||
" filename=\"statement_of_changes.pdf\",\n",
|
||
" extract_images_in_pdf=False,\n",
|
||
" infer_table_structure=True,\n",
|
||
" chunking_strategy=\"by_title\",\n",
|
||
" max_characters=4000,\n",
|
||
" new_after_n_chars=3800,\n",
|
||
" combine_text_under_n_chars=2000,\n",
|
||
" image_output_dir_path=\".\",\n",
|
||
")"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 312,
|
||
"referenced_widgets": [
|
||
"ac0fab4eb8e543c1a0f5b037e0235815",
|
||
"3649dec7e417482091fc2f86939e0c16",
|
||
"aa165def4267499fa5bd66c7a23aa83d",
|
||
"4ee55dbff1674ba9bdb0614692dc64e9",
|
||
"54445f16b2e5439dbea3d25c7d690933",
|
||
"b240ed924ce94639b137552549ffd29e",
|
||
"c0f954fb5893438fbc5306ba55d44c20",
|
||
"cf8f1809505a436c949d936e74f93094",
|
||
"7639470b337e47b0b7a60d32c8770744",
|
||
"17913e64f28b4a2daf9f9d21654c6372",
|
||
"ef27bad8255f447da09541fc10f80ad8",
|
||
"8ee21ab783c543c0944e8732faaebb4a",
|
||
"2024899dae44406a9e187c5d770f873f",
|
||
"2174f17cbb124ab2bf1a32765c0d5a0c",
|
||
"330603dd11944ad9a267378e3a17a573",
|
||
"674ae68fb32d40b48152f29f77636138",
|
||
"c1cd34e355a540f58cebe58990bee350",
|
||
"2248e9a15d694e7f94b6d817a0bceb90",
|
||
"2477901c30b44069ac5db5cf4e8924f0",
|
||
"f2d4643e36634b668eb2d749d74e7780",
|
||
"f9a39e14cb0140e8ac09cfb400344804",
|
||
"9e0916f00d374a858a3edb397a63af8d",
|
||
"5b720610ad684d5abe3ca6f3acdf6606",
|
||
"127c410899934087904875f527a2dbba",
|
||
"2f9d37688cfa46059f152c0e6b080904",
|
||
"cd3f489a29c8448c9a01f595690c0e48",
|
||
"f9056446dd594a2aaec5396995a5f065",
|
||
"42efb45824894d808c9e31ef7554a852",
|
||
"181f2590b9464b929bf9b4e3b5c88b5c",
|
||
"4f40596f290146d8b53dc3dda50a09e7",
|
||
"9869010a1cb949f7ae1feeef095fc05e",
|
||
"1cc0b1752a704fa0b8687d0411539055",
|
||
"089092011cf341379b9be17a052830f1",
|
||
"f4321c31f68a458382b2b8b969993831",
|
||
"e8ed92210b654289a7732b2e76f8dcad",
|
||
"7428d3cc85f148fc8659b2f7e5e1afe6",
|
||
"c34b003c7d524da0bb39e8c8cfe63d44",
|
||
"8fd5fbbf855c43428ff5253adda6a4b3",
|
||
"f623dbd8c9b5462bab46250eb7d492e6",
|
||
"bb8ab670115c46378f448a7ca93726e2",
|
||
"dc74c156b5234141934265b417a5ffcb",
|
||
"d18a0ce1bd7149d388d12cdd86c3390c",
|
||
"fb00943087d0426fad871d9d6ed68a2a",
|
||
"4e25db89a38944f1bf63427969d90caa"
|
||
]
|
||
},
|
||
"id": "nFpMgbR6Csj6",
|
||
"outputId": "2782369b-ab1e-4677-a542-e1080744c1eb"
|
||
},
|
||
"execution_count": 4,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
|
||
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
|
||
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
|
||
"[nltk_data] /root/nltk_data...\n",
|
||
"[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n"
|
||
]
|
||
},
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"yolox_l0.05.onnx: 0%| | 0.00/217M [00:00<?, ?B/s]"
|
||
],
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"version_major": 2,
|
||
"version_minor": 0,
|
||
"model_id": "ac0fab4eb8e543c1a0f5b037e0235815"
|
||
}
|
||
},
|
||
"metadata": {}
|
||
},
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"config.json: 0%| | 0.00/1.47k [00:00<?, ?B/s]"
|
||
],
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"version_major": 2,
|
||
"version_minor": 0,
|
||
"model_id": "8ee21ab783c543c0944e8732faaebb4a"
|
||
}
|
||
},
|
||
"metadata": {}
|
||
},
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"model.safetensors: 0%| | 0.00/115M [00:00<?, ?B/s]"
|
||
],
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"version_major": 2,
|
||
"version_minor": 0,
|
||
"model_id": "5b720610ad684d5abe3ca6f3acdf6606"
|
||
}
|
||
},
|
||
"metadata": {}
|
||
},
|
||
{
|
||
"output_type": "display_data",
|
||
"data": {
|
||
"text/plain": [
|
||
"model.safetensors: 0%| | 0.00/46.8M [00:00<?, ?B/s]"
|
||
],
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"version_major": 2,
|
||
"version_minor": 0,
|
||
"model_id": "f4321c31f68a458382b2b8b969993831"
|
||
}
|
||
},
|
||
"metadata": {}
|
||
},
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stderr",
|
||
"text": [
|
||
"Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked']\n",
|
||
"- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
"- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"2. Categorize the elements"
|
||
],
|
||
"metadata": {
|
||
"id": "cv8Tw7ETw90G"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"category_counts = {}\n",
|
||
"\n",
|
||
"for element in raw_pdf_elements:\n",
|
||
" category = str(type(element))\n",
|
||
" if category in category_counts:\n",
|
||
" category_counts[category] += 1\n",
|
||
" else:\n",
|
||
" category_counts[category] = 1\n",
|
||
"\n",
|
||
"unique_categories = set(category_counts.keys())\n",
|
||
"category_counts"
|
||
],
|
||
"metadata": {
|
||
"id": "0ofgTIuNDkLo",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "06c8b468-a500-461e-f1d3-0acfb8be6af7"
|
||
},
|
||
"execution_count": 30,
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"{\"<class 'unstructured.documents.elements.CompositeElement'>\": 5,\n",
|
||
" \"<class 'unstructured.documents.elements.Table'>\": 4}"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 30
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"class Element(BaseModel):\n",
|
||
" type: str\n",
|
||
" text: Any\n",
|
||
"\n",
|
||
"table_elements = []\n",
|
||
"text_elements = []\n",
|
||
"for element in raw_pdf_elements:\n",
|
||
" if \"unstructured.documents.elements.Table\" in str(type(element)):\n",
|
||
" table_elements.append(Element(type=\"table\", text=str(element)))\n",
|
||
" elif \"unstructured.documents.elements.CompositeElement\" in str(type(element)):\n",
|
||
" text_elements.append(Element(type=\"text\", text=str(element)))"
|
||
],
|
||
"metadata": {
|
||
"id": "alMtzOivbJXo"
|
||
},
|
||
"execution_count": 31,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(len(table_elements))\n",
|
||
"print(len(text_elements))"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "4rGCd4O5xlSh",
|
||
"outputId": "acd62e39-97cb-4400-f1d9-452eeafcd541"
|
||
},
|
||
"execution_count": 32,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"4\n",
|
||
"5\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"table_elements[0]"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "ld91YQP3nKTd",
|
||
"outputId": "a71da61e-39e6-4c4a-bcf4-1cbde6c0e26a"
|
||
},
|
||
"execution_count": 33,
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"Element(type='table', text='1. Name and Address of Reporting Person = \\\\Drell Persis 2. Issuer Name and Ticker or Trading Symbol INVIDIA CORP [ NVDA ] 5. Relationship of Reporting Person(s) to Issuer (Check all applicable) 3. Date of Earliest Transaction (MM/DD/YYYY) |_X_Director 10% Owner (es) (Fis) (Middle) Officer (give title below) Other (specify below) C/O NVIDIA CORPORATION, 2788 10/6/2023 SAN TOMAS EXPRESSWAY (Street) 4. If Amendment, Date Original Filed (MM/DD/YYYY) 6. Individual or Joint/Group Filing (Check Applicable Line) SANTA CLARA, CA 95051 | X_ Form filed by One Reporting Person - |__ Form filed by More than One Reporting Person (City) (State) (Zip)')"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 33
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"table_elements[2]"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "DHThHvBWnT87",
|
||
"outputId": "154f0173-8cf3-4e7f-ef90-ee3a1befd2d2"
|
||
},
|
||
"execution_count": 34,
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"Element(type='table', text='1. Title of Derivate |2. 3. Trans. 3A. Deemed |4. Trans. Code |5. Number of 6. Date Exercisable 7. Title and Amount of 8. Price of ]9. Number of | 10. Security Conversion | Date Execution |(Instr, 8) Derivative Securities |and Expiration Date _| Securities Underlying Derivative }derivative | Ownership] (Instr. 3) or Exercise Date, if any Acquired (A) or Derivative Security Security |Securities |Formof Price of Disposed of (D) (Instr. 3 and 4) (Instr. 5) |Beneficially | Derivative | Derivative (Instr. 3, 4 and 5) Owned Security: | Security Following —_| Direct (D) - Reported _| or Indirect Date Expiration] 1... | Amount or Number of Transaction(s)] (1) (Instr. coe |v | (a) (D) | Exercisable|Date Shares (instr. 4) 4)')"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 34
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from langchain.chat_models import ChatOpenAI\n",
|
||
"from langchain.prompts import ChatPromptTemplate\n",
|
||
"from langchain.schema.output_parser import StrOutputParser"
|
||
],
|
||
"metadata": {
|
||
"id": "he7fQp5qbLnu"
|
||
},
|
||
"execution_count": 35,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"3. Build up summarization chain with LangChain framework"
|
||
],
|
||
"metadata": {
|
||
"id": "1hBf_lftySxt"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# Prompt\n",
|
||
"prompt_text = \"\"\"\n",
|
||
" You are responsible for concisely summarizing table or text chunk:\n",
|
||
"\n",
|
||
" {element}\n",
|
||
"\"\"\"\n",
|
||
"prompt = ChatPromptTemplate.from_template(prompt_text)\n",
|
||
"\n",
|
||
"# Summarization chain\n",
|
||
"model = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
|
||
"summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()"
|
||
],
|
||
"metadata": {
|
||
"id": "uDQYbnKDbM7C"
|
||
},
|
||
"execution_count": 36,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"4. Summarize each text and table element"
|
||
],
|
||
"metadata": {
|
||
"id": "TikuTLiKyXy3"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# Apply to tables\n",
|
||
"tables = [i.text for i in table_elements]\n",
|
||
"table_summaries = summarize_chain.batch(tables, {\"max_concurrency\": 5})\n",
|
||
"# Apply to texts\n",
|
||
"texts = [i.text for i in text_elements]\n",
|
||
"text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})"
|
||
],
|
||
"metadata": {
|
||
"id": "SMqqogGDbOk_"
|
||
},
|
||
"execution_count": 12,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"5. Use LangChain MultiVectorRetriever to associate summaries of tables and texts with original text chunks in parent-child relationship."
|
||
],
|
||
"metadata": {
|
||
"id": "PskDZuy7ydfV"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import uuid\n",
|
||
"\n",
|
||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
|
||
"from langchain.schema.document import Document\n",
|
||
"from langchain.storage import InMemoryStore\n",
|
||
"from langchain.vectorstores import Chroma\n",
|
||
"\n",
|
||
"# The vectorstore to use to index the child chunks\n",
|
||
"vectorstore = Chroma(collection_name=\"summaries\", embedding_function=OpenAIEmbeddings())\n",
|
||
"\n",
|
||
"# The storage layer for the parent documents\n",
|
||
"store = InMemoryStore()\n",
|
||
"id_key = \"doc_id\"\n",
|
||
"\n",
|
||
"# The retriever (empty to start)\n",
|
||
"retriever = MultiVectorRetriever(\n",
|
||
" vectorstore=vectorstore,\n",
|
||
" docstore=store,\n",
|
||
" id_key=id_key,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Add texts\n",
|
||
"doc_ids = [str(uuid.uuid4()) for _ in texts]\n",
|
||
"summary_texts = [\n",
|
||
" Document(page_content=s, metadata={id_key: doc_ids[i]})\n",
|
||
" for i, s in enumerate(text_summaries)\n",
|
||
"]\n",
|
||
"retriever.vectorstore.add_documents(summary_texts)\n",
|
||
"retriever.docstore.mset(list(zip(doc_ids, texts)))\n",
|
||
"\n",
|
||
"# Add tables\n",
|
||
"table_ids = [str(uuid.uuid4()) for _ in tables]\n",
|
||
"summary_tables = [\n",
|
||
" Document(page_content=s, metadata={id_key: table_ids[i]})\n",
|
||
" for i, s in enumerate(table_summaries)\n",
|
||
"]\n",
|
||
"retriever.vectorstore.add_documents(summary_tables)\n",
|
||
"retriever.docstore.mset(list(zip(table_ids, tables)))"
|
||
],
|
||
"metadata": {
|
||
"id": "-deJeuO4bRSZ"
|
||
},
|
||
"execution_count": 19,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from langchain.schema.runnable import RunnablePassthrough\n",
|
||
"\n",
|
||
"# Prompt template\n",
|
||
"template = \"\"\"Answer the question based only on the following context, which can include text and tables:\n",
|
||
"{context}\n",
|
||
"Question: {question}\n",
|
||
"\"\"\"\n",
|
||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||
"\n",
|
||
"# LLM\n",
|
||
"model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||
"\n",
|
||
"# RAG pipeline\n",
|
||
"chain = (\n",
|
||
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
|
||
" | prompt\n",
|
||
" | model\n",
|
||
" | StrOutputParser()\n",
|
||
")"
|
||
],
|
||
"metadata": {
|
||
"id": "NuZzXm_kbTK7"
|
||
},
|
||
"execution_count": 37,
|
||
"outputs": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"chain.invoke(\"How many stocks were disposed? Who is the beneficial owner?\")"
|
||
],
|
||
"metadata": {
|
||
"id": "wAEyfvmabU3u",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 36
|
||
},
|
||
"outputId": "8ba7f132-32c0-455c-c6b5-453a40d4f60b"
|
||
},
|
||
"execution_count": 38,
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"'2300 stocks were disposed. The beneficial owner is the Welch-Drell 2009 Revocable Trust.'"
|
||
],
|
||
"application/vnd.google.colaboratory.intrinsic+json": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 38
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"6. Experiment with GPT-3.5\n",
|
||
"\n",
|
||
"Looks it doesn't perform as well as GPT-4."
|
||
],
|
||
"metadata": {
|
||
"id": "Jbr8tZjuzGZf"
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# RAG pipeline\n",
|
||
"chain = (\n",
|
||
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
|
||
" | prompt\n",
|
||
" | ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
|
||
" | StrOutputParser()\n",
|
||
")\n",
|
||
"chain.invoke(\"How many stocks were disposed? Who is the beneficial owner?\")"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 54
|
||
},
|
||
"id": "YWnJywytzPNt",
|
||
"outputId": "453ac524-4528-4875-981d-b016b84ef72b"
|
||
},
|
||
"execution_count": 39,
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
"'Based on the given context, it is not possible to determine how many stocks were disposed or who the beneficial owner is. The context does not provide any specific information about the disposal of stocks or the identification of the beneficial owner.'"
|
||
],
|
||
"application/vnd.google.colaboratory.intrinsic+json": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 39
|
||
}
|
||
]
|
||
}
|
||
]
|
||
} |