This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
soft-analytics-02/models/baris/model_0.1.ipynb
Claudio Maggioni a4ceee8716 Final version of the project
History has been rewritten to delete large files in repo
2024-01-03 15:28:43 +01:00

3250 lines
No EOL
123 KiB
Text
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm",
"gpuType": "V100"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"f34b3a23068f4083bf99d857321c1e28": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_cf038043fe0b4e128dabf84f538a0902",
"IPY_MODEL_f66e31747591402595daf4c8dae393d9",
"IPY_MODEL_a901272b1b1b40728119a811f7fc2501"
],
"layout": "IPY_MODEL_d8e16d601ea34fb79081e19fcf3699b5"
}
},
"cf038043fe0b4e128dabf84f538a0902": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1582cc9c968b442fa9cc47f5ca19fd22",
"placeholder": "",
"style": "IPY_MODEL_3f93157aaf1548d784d8fe7f5378e58f",
"value": "tokenizer_config.json: 100%"
}
},
"f66e31747591402595daf4c8dae393d9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_acd998de0bd84481bc08a2c2517cfa0d",
"max": 1477,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0b9230adec7846ebbac6b941107fa914",
"value": 1477
}
},
"a901272b1b1b40728119a811f7fc2501": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_84b66e717aad49c1abce3950fe2a4a3a",
"placeholder": "",
"style": "IPY_MODEL_2fbc4db8c1274a438e03cfca03756c64",
"value": " 1.48k/1.48k [00:00<00:00, 124kB/s]"
}
},
"d8e16d601ea34fb79081e19fcf3699b5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1582cc9c968b442fa9cc47f5ca19fd22": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3f93157aaf1548d784d8fe7f5378e58f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"acd998de0bd84481bc08a2c2517cfa0d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0b9230adec7846ebbac6b941107fa914": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"84b66e717aad49c1abce3950fe2a4a3a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2fbc4db8c1274a438e03cfca03756c64": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d6c23b4bbd3d4effabb69ba0c863d5b5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ae259bc4c5bf4d46b20bd4d3e53d458e",
"IPY_MODEL_53eabf2da0c449f29af023583b9edc7f",
"IPY_MODEL_85ae730bf64e49778a6ebc9cef16b08e"
],
"layout": "IPY_MODEL_6354db35dcd9477cbfc13d41bca93f7e"
}
},
"ae259bc4c5bf4d46b20bd4d3e53d458e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_67499569ee804875801fdef011dfc5a6",
"placeholder": "",
"style": "IPY_MODEL_17ca156e929649a38cb88fb7f6b273c9",
"value": "vocab.json: 100%"
}
},
"53eabf2da0c449f29af023583b9edc7f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_37690a2525f442c4a27b573534fc81ff",
"max": 703051,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_e6b24cd85f674906a0bdcb0bb5c73713",
"value": 703051
}
},
"85ae730bf64e49778a6ebc9cef16b08e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_152ae0c3fca34a558958f50c7a8dd4d7",
"placeholder": "",
"style": "IPY_MODEL_59e1540eb17f4a8eab4b3e8ade5eed95",
"value": " 703k/703k [00:00<00:00, 4.09MB/s]"
}
},
"6354db35dcd9477cbfc13d41bca93f7e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"67499569ee804875801fdef011dfc5a6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"17ca156e929649a38cb88fb7f6b273c9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"37690a2525f442c4a27b573534fc81ff": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e6b24cd85f674906a0bdcb0bb5c73713": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"152ae0c3fca34a558958f50c7a8dd4d7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"59e1540eb17f4a8eab4b3e8ade5eed95": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"c3c4524ceeac426ab956b15db34b7f35": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_97f3309a7be940639c0b30f3fae9b34c",
"IPY_MODEL_cf3dafc801f14ce2abd50af3231962cc",
"IPY_MODEL_3c237d0b50464d87a4cbc105cfa88c5b"
],
"layout": "IPY_MODEL_0cdca5aed61b47eebda82f495e7e961d"
}
},
"97f3309a7be940639c0b30f3fae9b34c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f864352b0ac949448b643935e2749b16",
"placeholder": "",
"style": "IPY_MODEL_eb4ceb855c6643a09d752f3792347e15",
"value": "merges.txt: 100%"
}
},
"cf3dafc801f14ce2abd50af3231962cc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_aed5a35352a24fd0a638fdfbb27b774f",
"max": 294364,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_2ef4560e3b6b41ad93310b087563aaa2",
"value": 294364
}
},
"3c237d0b50464d87a4cbc105cfa88c5b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d095d517f247490f8e87df5f7a0946d9",
"placeholder": "",
"style": "IPY_MODEL_a86a3aaf700d46d2a06eb5d5c509c3de",
"value": " 294k/294k [00:00<00:00, 1.20MB/s]"
}
},
"0cdca5aed61b47eebda82f495e7e961d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f864352b0ac949448b643935e2749b16": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"eb4ceb855c6643a09d752f3792347e15": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"aed5a35352a24fd0a638fdfbb27b774f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2ef4560e3b6b41ad93310b087563aaa2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"d095d517f247490f8e87df5f7a0946d9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a86a3aaf700d46d2a06eb5d5c509c3de": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"980d90e972c84d6f9528af0b2da1a06c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f41b2b23eb8e4add8352fc7221896432",
"IPY_MODEL_7b6df6581f9a4621b52713464b0cee07",
"IPY_MODEL_24f31b1c6c6b4a5cb21294c083bf7b9a"
],
"layout": "IPY_MODEL_fe9ba5752047490baf7dc0519248220f"
}
},
"f41b2b23eb8e4add8352fc7221896432": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a3fe0bfba6244f3bbb57ecc5cfd2be4f",
"placeholder": "",
"style": "IPY_MODEL_10b6501e4a7b4bdfb1e8e6b8d925feca",
"value": "added_tokens.json: 100%"
}
},
"7b6df6581f9a4621b52713464b0cee07": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_19c5b1a447984095a74d06d6f14ff84c",
"max": 2,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b78ab8a182674478ba4034c16098c0b2",
"value": 2
}
},
"24f31b1c6c6b4a5cb21294c083bf7b9a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_da6d73dce8c24dc8a5868a878372e801",
"placeholder": "",
"style": "IPY_MODEL_d711eaaec353467c9e098f2dda8dbdda",
"value": " 2.00/2.00 [00:00<00:00, 161B/s]"
}
},
"fe9ba5752047490baf7dc0519248220f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a3fe0bfba6244f3bbb57ecc5cfd2be4f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"10b6501e4a7b4bdfb1e8e6b8d925feca": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"19c5b1a447984095a74d06d6f14ff84c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b78ab8a182674478ba4034c16098c0b2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"da6d73dce8c24dc8a5868a878372e801": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d711eaaec353467c9e098f2dda8dbdda": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cd532737227248aead6827297b1eabf2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_50a1e8ca5759454d84b3a3c24be49754",
"IPY_MODEL_d0e649389b5346f4bf7f59e7d99a7731",
"IPY_MODEL_9ea4fae1c1b34c06b40438930d99b9eb"
],
"layout": "IPY_MODEL_8477e51ce81145bfb0af94cd410dbcec"
}
},
"50a1e8ca5759454d84b3a3c24be49754": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_638c197a303a45ec94045299ba045aa4",
"placeholder": "",
"style": "IPY_MODEL_6f3417b756cb49889f0df17f3d5eb7aa",
"value": "special_tokens_map.json: 100%"
}
},
"d0e649389b5346f4bf7f59e7d99a7731": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4f803f6d553d4089b5988f8cd0ec6d10",
"max": 12512,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_ca2bfd2324f043ecbc3797960f0d4a36",
"value": 12512
}
},
"9ea4fae1c1b34c06b40438930d99b9eb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_45cf5a3287c046b6ab4d7a5906d21f6f",
"placeholder": "",
"style": "IPY_MODEL_b508129f8c104d0ebcb9a2e085c7f416",
"value": " 12.5k/12.5k [00:00<00:00, 1.03MB/s]"
}
},
"8477e51ce81145bfb0af94cd410dbcec": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"638c197a303a45ec94045299ba045aa4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6f3417b756cb49889f0df17f3d5eb7aa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"4f803f6d553d4089b5988f8cd0ec6d10": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ca2bfd2324f043ecbc3797960f0d4a36": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"45cf5a3287c046b6ab4d7a5906d21f6f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b508129f8c104d0ebcb9a2e085c7f416": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"32fa576fd18340fa966ae6b80af79540": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_9933d4e61d8549e8a19eaf5b88350397",
"IPY_MODEL_df71a85d2435438ca1b704b7a6396853",
"IPY_MODEL_54ec9c0ec9434f6ca20f7918ddeeaffc"
],
"layout": "IPY_MODEL_e8fcc59f32384456b706ec20976df9b4"
}
},
"9933d4e61d8549e8a19eaf5b88350397": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f894552397a4482cba389b46d7952370",
"placeholder": "",
"style": "IPY_MODEL_183ca2a1d51f4c0a8cefa10ea827d0ed",
"value": "config.json: 100%"
}
},
"df71a85d2435438ca1b704b7a6396853": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_48ad95b4094a45f29d77be2c61b02517",
"max": 1568,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_c87b9cb7a0ac48e5bfa5f1c5b00dc4a3",
"value": 1568
}
},
"54ec9c0ec9434f6ca20f7918ddeeaffc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6523a4d8a51f43588cb2bf6e31af3d14",
"placeholder": "",
"style": "IPY_MODEL_8bbccf6e15934db5821786ffaf79b919",
"value": " 1.57k/1.57k [00:00<00:00, 136kB/s]"
}
},
"e8fcc59f32384456b706ec20976df9b4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f894552397a4482cba389b46d7952370": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"183ca2a1d51f4c0a8cefa10ea827d0ed": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"48ad95b4094a45f29d77be2c61b02517": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c87b9cb7a0ac48e5bfa5f1c5b00dc4a3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"6523a4d8a51f43588cb2bf6e31af3d14": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8bbccf6e15934db5821786ffaf79b919": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"139e2d21a4ad45aa8b708b8d40bc9860": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_980100e4f5a549f68f087ef7d72e4b36",
"IPY_MODEL_82cf354c1be145a9ae8f288a2b047a46",
"IPY_MODEL_e63e0187150e4a48ab132ddf468f68b1"
],
"layout": "IPY_MODEL_7720ba493849492c9bac5b662fd1ebb0"
}
},
"980100e4f5a549f68f087ef7d72e4b36": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d8b981ea03ec4e71a1869aa38da01d32",
"placeholder": "",
"style": "IPY_MODEL_e76c83504dbe46868787036cd898acfb",
"value": "pytorch_model.bin: 100%"
}
},
"82cf354c1be145a9ae8f288a2b047a46": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8a995129a7a0428e8cd1b6ac577da285",
"max": 891641279,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_6d83432ae4aa432a81d20f3041266e6f",
"value": 891641279
}
},
"e63e0187150e4a48ab132ddf468f68b1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_eaa107ccb3f4490fbe688cab38661aa8",
"placeholder": "",
"style": "IPY_MODEL_50e235b9eea64337b7187e338526b685",
"value": " 892M/892M [00:02<00:00, 354MB/s]"
}
},
"7720ba493849492c9bac5b662fd1ebb0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d8b981ea03ec4e71a1869aa38da01d32": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e76c83504dbe46868787036cd898acfb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8a995129a7a0428e8cd1b6ac577da285": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6d83432ae4aa432a81d20f3041266e6f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"eaa107ccb3f4490fbe688cab38661aa8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"50e235b9eea64337b7187e338526b685": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from transformers import RobertaTokenizer, T5ForConditionalGeneration, DataCollatorForLanguageModeling\n",
"from sklearn.model_selection import train_test_split\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import torch\n",
"\n",
"# Step 1: Read the Parquet file into a Pandas DataFrame\n",
"df = pd.read_parquet('/content/drive/MyDrive/analytics/if/functions.pq')\n",
"df = df.head(5000)\n",
"# Step 2: Split the DataFrame into two parts for pre-training and fine-tuning\n",
"pretrain_df, fine_tune_df = train_test_split(df, test_size=0.5, random_state=42)\n",
"\n",
"# Step 3: Further split the fine-tuning DataFrame for evaluation and test sets\n",
"eval_df = fine_tune_df.sample(frac=0.1, random_state=42)\n",
"test_df = fine_tune_df.drop(eval_df.index).sample(frac=0.1111, random_state=42) # Ensures 10% of the original fine_tune_df\n",
"fine_tune_df = fine_tune_df.drop(eval_df.index).drop(test_df.index)\n",
"\n",
"# Ensure no overlap between eval_df and test_df\n",
"assert len(set(eval_df.index).intersection(set(test_df.index))) == 0\n"
],
"metadata": {
"id": "osciDyaEIkyL"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 4: Prepare the Data for the Model\n",
"tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')\n",
"model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base')\n",
"\n",
"# Define the dataset class\n",
"class PythonCodeDataset(Dataset):\n",
" def __init__(self, tokenizer, dataframe, max_len=512):\n",
" self.tokenizer = tokenizer\n",
" self.data = dataframe\n",
" self.max_len = max_len\n",
"\n",
" def __len__(self):\n",
" return len(self.data)\n",
"\n",
" def __getitem__(self, index):\n",
" code = self.data.iloc[index]['source']\n",
" inputs = self.tokenizer.encode_plus(\n",
" code,\n",
" None,\n",
" add_special_tokens=True,\n",
" max_length=self.max_len,\n",
" padding='max_length',\n",
" return_token_type_ids=True,\n",
" truncation=True\n",
" )\n",
" input_ids = inputs['input_ids']\n",
" attention_mask = inputs['attention_mask']\n",
"\n",
" return {\n",
" 'input_ids': torch.tensor(input_ids, dtype=torch.long),\n",
" 'attention_mask': torch.tensor(attention_mask, dtype=torch.long)\n",
" }\n",
"\n",
"# Instantiate the dataset for pre-training\n",
"pretrain_dataset = PythonCodeDataset(tokenizer, pretrain_df)\n",
"\n",
"# Set up the data collator for MLM\n",
"data_collator = DataCollatorForLanguageModeling(\n",
" tokenizer=tokenizer,\n",
" mlm=True,\n",
" mlm_probability=0.15\n",
")\n",
"\n",
"# Create a DataLoader for pre-training\n",
"pretrain_loader = DataLoader(pretrain_dataset, batch_size=8, shuffle=True, collate_fn=data_collator)\n",
"\n",
"# Define the pre-training loop\n",
"def pretrain(model, dataloader, epochs, print_every=10):\n",
" model.train()\n",
" optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)\n",
" global_step = 0 # Initialize a counter for the global training step\n",
"\n",
" for epoch in range(epochs):\n",
" for batch in dataloader:\n",
" optimizer.zero_grad()\n",
" inputs = {'input_ids': batch['input_ids'], 'attention_mask': batch['attention_mask']}\n",
" outputs = model(**inputs, labels=batch['input_ids'])\n",
" loss = outputs.loss\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" if global_step % print_every == 0: # Print every steps\n",
" print(f\"Step {global_step}, Loss: {loss.item()}\")\n",
"\n",
" global_step += 1 # Increment the step counter\n",
"\n",
" print(f\"Epoch {epoch+1}/{epochs} completed.\")\n",
"\n",
"# Pre-train the model\n",
"pretrain(model, pretrain_loader, epochs=1)\n",
"\n",
"# Save the pre-trained model\n",
"#model.save_pretrained('/content/drive/MyDrive/analytics/if/pretrained_model')\n",
"\n",
"# TODO: Implement the fine-tuning part with special token masking a single if condition\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 914,
"referenced_widgets": [
"f34b3a23068f4083bf99d857321c1e28",
"cf038043fe0b4e128dabf84f538a0902",
"f66e31747591402595daf4c8dae393d9",
"a901272b1b1b40728119a811f7fc2501",
"d8e16d601ea34fb79081e19fcf3699b5",
"1582cc9c968b442fa9cc47f5ca19fd22",
"3f93157aaf1548d784d8fe7f5378e58f",
"acd998de0bd84481bc08a2c2517cfa0d",
"0b9230adec7846ebbac6b941107fa914",
"84b66e717aad49c1abce3950fe2a4a3a",
"2fbc4db8c1274a438e03cfca03756c64",
"d6c23b4bbd3d4effabb69ba0c863d5b5",
"ae259bc4c5bf4d46b20bd4d3e53d458e",
"53eabf2da0c449f29af023583b9edc7f",
"85ae730bf64e49778a6ebc9cef16b08e",
"6354db35dcd9477cbfc13d41bca93f7e",
"67499569ee804875801fdef011dfc5a6",
"17ca156e929649a38cb88fb7f6b273c9",
"37690a2525f442c4a27b573534fc81ff",
"e6b24cd85f674906a0bdcb0bb5c73713",
"152ae0c3fca34a558958f50c7a8dd4d7",
"59e1540eb17f4a8eab4b3e8ade5eed95",
"c3c4524ceeac426ab956b15db34b7f35",
"97f3309a7be940639c0b30f3fae9b34c",
"cf3dafc801f14ce2abd50af3231962cc",
"3c237d0b50464d87a4cbc105cfa88c5b",
"0cdca5aed61b47eebda82f495e7e961d",
"f864352b0ac949448b643935e2749b16",
"eb4ceb855c6643a09d752f3792347e15",
"aed5a35352a24fd0a638fdfbb27b774f",
"2ef4560e3b6b41ad93310b087563aaa2",
"d095d517f247490f8e87df5f7a0946d9",
"a86a3aaf700d46d2a06eb5d5c509c3de",
"980d90e972c84d6f9528af0b2da1a06c",
"f41b2b23eb8e4add8352fc7221896432",
"7b6df6581f9a4621b52713464b0cee07",
"24f31b1c6c6b4a5cb21294c083bf7b9a",
"fe9ba5752047490baf7dc0519248220f",
"a3fe0bfba6244f3bbb57ecc5cfd2be4f",
"10b6501e4a7b4bdfb1e8e6b8d925feca",
"19c5b1a447984095a74d06d6f14ff84c",
"b78ab8a182674478ba4034c16098c0b2",
"da6d73dce8c24dc8a5868a878372e801",
"d711eaaec353467c9e098f2dda8dbdda",
"cd532737227248aead6827297b1eabf2",
"50a1e8ca5759454d84b3a3c24be49754",
"d0e649389b5346f4bf7f59e7d99a7731",
"9ea4fae1c1b34c06b40438930d99b9eb",
"8477e51ce81145bfb0af94cd410dbcec",
"638c197a303a45ec94045299ba045aa4",
"6f3417b756cb49889f0df17f3d5eb7aa",
"4f803f6d553d4089b5988f8cd0ec6d10",
"ca2bfd2324f043ecbc3797960f0d4a36",
"45cf5a3287c046b6ab4d7a5906d21f6f",
"b508129f8c104d0ebcb9a2e085c7f416",
"32fa576fd18340fa966ae6b80af79540",
"9933d4e61d8549e8a19eaf5b88350397",
"df71a85d2435438ca1b704b7a6396853",
"54ec9c0ec9434f6ca20f7918ddeeaffc",
"e8fcc59f32384456b706ec20976df9b4",
"f894552397a4482cba389b46d7952370",
"183ca2a1d51f4c0a8cefa10ea827d0ed",
"48ad95b4094a45f29d77be2c61b02517",
"c87b9cb7a0ac48e5bfa5f1c5b00dc4a3",
"6523a4d8a51f43588cb2bf6e31af3d14",
"8bbccf6e15934db5821786ffaf79b919",
"139e2d21a4ad45aa8b708b8d40bc9860",
"980100e4f5a549f68f087ef7d72e4b36",
"82cf354c1be145a9ae8f288a2b047a46",
"e63e0187150e4a48ab132ddf468f68b1",
"7720ba493849492c9bac5b662fd1ebb0",
"d8b981ea03ec4e71a1869aa38da01d32",
"e76c83504dbe46868787036cd898acfb",
"8a995129a7a0428e8cd1b6ac577da285",
"6d83432ae4aa432a81d20f3041266e6f",
"eaa107ccb3f4490fbe688cab38661aa8",
"50e235b9eea64337b7187e338526b685"
]
},
"id": "wE2cXvhRRHTz",
"outputId": "65adcc96-49d1-48c4-845c-dd79281fcf61"
},
"execution_count": 2,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/1.48k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "f34b3a23068f4083bf99d857321c1e28"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"vocab.json: 0%| | 0.00/703k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d6c23b4bbd3d4effabb69ba0c863d5b5"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"merges.txt: 0%| | 0.00/294k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c3c4524ceeac426ab956b15db34b7f35"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"added_tokens.json: 0%| | 0.00/2.00 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "980d90e972c84d6f9528af0b2da1a06c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/12.5k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "cd532737227248aead6827297b1eabf2"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config.json: 0%| | 0.00/1.57k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "32fa576fd18340fa966ae6b80af79540"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"pytorch_model.bin: 0%| | 0.00/892M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "139e2d21a4ad45aa8b708b8d40bc9860"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Step 0, Loss: 4.4537835121154785\n",
"Step 10, Loss: 0.7301594018936157\n",
"Step 20, Loss: 0.20075637102127075\n",
"Step 30, Loss: 0.2760685086250305\n",
"Step 40, Loss: 0.06050005927681923\n",
"Step 50, Loss: 0.10328269004821777\n",
"Step 60, Loss: 0.07471203804016113\n",
"Step 70, Loss: 0.0644034817814827\n",
"Step 80, Loss: 0.031095409765839577\n",
"Step 90, Loss: 0.04370066151022911\n",
"Step 100, Loss: 0.01667855493724346\n",
"Step 110, Loss: 0.02415052428841591\n",
"Step 120, Loss: 0.04331440106034279\n",
"Step 130, Loss: 0.03492546081542969\n",
"Step 140, Loss: 0.020435431972146034\n",
"Step 150, Loss: 0.020917698740959167\n",
"Step 160, Loss: 0.01400820817798376\n",
"Step 170, Loss: 0.006651030387729406\n",
"Step 180, Loss: 0.01667148619890213\n",
"Step 190, Loss: 0.007088858168572187\n",
"Step 200, Loss: 0.010270923376083374\n",
"Step 210, Loss: 0.058693502098321915\n",
"Step 220, Loss: 0.0021587631199508905\n",
"Step 230, Loss: 0.00282807438634336\n",
"Step 240, Loss: 0.006106184795498848\n",
"Step 250, Loss: 0.009657995775341988\n",
"Step 260, Loss: 0.008495030924677849\n",
"Step 270, Loss: 0.008562863804399967\n",
"Step 280, Loss: 0.02096525952219963\n",
"Step 290, Loss: 0.01923210360109806\n",
"Step 300, Loss: 0.005103807430714369\n",
"Step 310, Loss: 0.00888622086495161\n",
"Epoch 1/1 completed.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import re\n",
"\n",
"# Step 5: Prepare the Data for Fine-Tuning\n",
"# Function to mask if conditions\n",
"def mask_if_condition(code_snippet):\n",
" # Find all if conditions\n",
" if_conditions = re.findall(r'(if\\s+.*?:)', code_snippet)\n",
" masked_snippet = code_snippet\n",
" ground_truth = None\n",
"\n",
" # Mask a single if condition if exists\n",
" if if_conditions:\n",
" ground_truth = if_conditions[0]\n",
" masked_snippet = code_snippet.replace(ground_truth, '<mask>', 1)\n",
"\n",
" return masked_snippet, ground_truth\n",
"\n",
"# Create a new DataFrame with masked if conditions\n",
"fine_tune_df['masked_code'], fine_tune_df['ground_truth'] = zip(*fine_tune_df['source'].apply(mask_if_condition))\n",
"\n",
"# Drop rows without if conditions\n",
"fine_tune_df.dropna(subset=['ground_truth'], inplace=True)\n",
"\n",
"# Define the fine-tuning dataset class\n",
"class MaskedIfDataset(Dataset):\n",
" def __init__(self, tokenizer, dataframe, max_len=512):\n",
" self.tokenizer = tokenizer\n",
" self.data = dataframe\n",
" self.max_len = max_len\n",
"\n",
" def __len__(self):\n",
" return len(self.data)\n",
"\n",
" def __getitem__(self, index):\n",
" masked_code = self.data.iloc[index]['masked_code']\n",
" ground_truth = self.data.iloc[index]['ground_truth']\n",
" inputs = self.tokenizer(masked_code, max_length=self.max_len, padding='max_length', truncation=True, return_tensors=\"pt\")\n",
" labels = self.tokenizer(ground_truth, max_length=self.max_len, padding='max_length', truncation=True, return_tensors=\"pt\").input_ids\n",
" labels[labels == self.tokenizer.pad_token_id] = -100 # Set pad token labels to -100 so they are not included in the loss\n",
"\n",
" return {\n",
" 'input_ids': inputs.input_ids.squeeze(),\n",
" 'attention_mask': inputs.attention_mask.squeeze(),\n",
" 'labels': labels.squeeze()\n",
" }\n",
"\n",
"# Instantiate the fine-tuning dataset\n",
"fine_tune_dataset = MaskedIfDataset(tokenizer, fine_tune_df)\n",
"\n",
"# Create a DataLoader for fine-tuning\n",
"fine_tune_loader = DataLoader(fine_tune_dataset, batch_size=8, shuffle=True)\n",
"\n",
"# Step 6: Fine-Tune the Model\n",
"def fine_tune(model, dataloader, epochs, print_every=10):\n",
" model.train()\n",
" optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)\n",
" global_step = 0 # Initialize a counter for the global training step\n",
"\n",
" for epoch in range(epochs):\n",
" for batch in dataloader:\n",
" optimizer.zero_grad()\n",
" inputs = {\n",
" 'input_ids': batch['input_ids'],\n",
" 'attention_mask': batch['attention_mask'],\n",
" 'labels': batch['labels']\n",
" }\n",
" outputs = model(**inputs)\n",
" loss = outputs.loss\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" if global_step % print_every == 0: # Print every print_every steps\n",
" print(f\"Step {global_step}, Loss: {loss.item()}\")\n",
"\n",
" global_step += 1 # Increment the step counter\n",
"\n",
" print(f\"Epoch {epoch+1}/{epochs} completed.\")\n",
"\n",
"# Fine-tune the model\n",
"fine_tune(model, fine_tune_loader, epochs=1)\n",
"\n",
"# Save the fine-tuned model\n",
"# model.save_pretrained('/content/drive/MyDrive/analytics/if/fine_tuned_model')\n"
],
"metadata": {
"id": "Hg_YxXBlOlA3",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "47818156-466f-4237-9e3b-179501b8e642"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Step 0, Loss: 4.836482524871826\n",
"Step 10, Loss: 2.740732192993164\n",
"Step 20, Loss: 1.690596342086792\n",
"Step 30, Loss: 1.883946180343628\n",
"Step 40, Loss: 1.148565411567688\n",
"Step 50, Loss: 1.1054044961929321\n",
"Step 60, Loss: 1.2848252058029175\n",
"Step 70, Loss: 1.6655901670455933\n",
"Step 80, Loss: 0.8521689772605896\n",
"Step 90, Loss: 0.6937440633773804\n",
"Epoch 1/1 completed.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Importing the necessary libraries for this task\n",
"import random\n",
"\n",
"# Set a random seed for reproducibility\n",
"random.seed(42)\n",
"\n",
"# Define a function to get model predictions\n",
"def get_predictions(model, tokenizer, code_snippet):\n",
" inputs = tokenizer(code_snippet, max_length=512, padding='max_length', truncation=True, return_tensors=\"pt\")\n",
" input_ids = inputs.input_ids\n",
" attention_mask = inputs.attention_mask\n",
"\n",
" # Generate predictions\n",
" with torch.no_grad():\n",
" outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_return_sequences=1)\n",
"\n",
" # Decode the generated output\n",
" decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
"\n",
" return decoded_output\n",
"\n",
"# Print if statements, masked versions, ground truth, and model predictions for 5 items\n",
"for i, row in fine_tune_df.sample(5).iterrows():\n",
" code_snippet = row['source']\n",
" masked_code = row['masked_code']\n",
" ground_truth = row['ground_truth']\n",
"\n",
" # Get model predictions\n",
" model_prediction = get_predictions(model, tokenizer, masked_code)\n",
"\n",
" print(f\"Item {i+1}:\")\n",
" print(\"Original Code:\")\n",
" print(code_snippet)\n",
" print(\"\\nMasked Code:\")\n",
" print(masked_code)\n",
" print(\"\\nGround Truth:\")\n",
" print(ground_truth)\n",
" print(\"\\nModel Prediction:\")\n",
" print(model_prediction)\n",
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n"
],
"metadata": {
"id": "ACB6vG-3Th5U",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1f10ed2d-9ca2-4e55-cbf8-e93e3d942d04"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Item 4680:\n",
"Original Code:\n",
"def build_slim_model(cfg, slim_cfg, mode='train'):\n",
" with open(slim_cfg) as f:\n",
" slim_load_cfg = yaml.load(f, Loader=yaml.Loader)\n",
" if mode != 'train' and slim_load_cfg['slim'] == 'Distill':\n",
" return cfg\n",
" if slim_load_cfg['slim'] == 'Distill':\n",
" if 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'FGD':\n",
" model = FGDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'LD':\n",
" model = LDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'CWD':\n",
" model = CWDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'PPYOLOEDistill':\n",
" model = PPYOLOEDistillModel(cfg, slim_cfg)\n",
" else:\n",
" model = DistillModel(cfg, slim_cfg)\n",
" cfg['model'] = model\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'OFA':\n",
" load_config(slim_cfg)\n",
" model = create(cfg.architecture)\n",
" load_pretrain_weight(model, cfg.weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim'] = slim\n",
" cfg['model'] = slim(model, model.state_dict())\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'DistillPrune':\n",
" if mode == 'train':\n",
" model = DistillModel(cfg, slim_cfg)\n",
" pruner = create(cfg.pruner)\n",
" pruner(model.student_model)\n",
" else:\n",
" model = create(cfg.architecture)\n",
" weights = cfg.weights\n",
" load_config(slim_cfg)\n",
" pruner = create(cfg.pruner)\n",
" model = pruner(model)\n",
" load_pretrain_weight(model, weights)\n",
" cfg['model'] = model\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'PTQ':\n",
" model = create(cfg.architecture)\n",
" load_config(slim_cfg)\n",
" load_pretrain_weight(model, cfg.weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" cfg['slim'] = slim\n",
" cfg['model'] = slim(model)\n",
" elif slim_load_cfg['slim'] == 'UnstructuredPruner':\n",
" load_config(slim_cfg)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" cfg['slim'] = slim\n",
" cfg['unstructured_prune'] = True\n",
" else:\n",
" load_config(slim_cfg)\n",
" model = create(cfg.architecture)\n",
" if mode == 'train':\n",
" load_pretrain_weight(model, cfg.pretrain_weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" if mode == 'test' and 'QAT' in slim_load_cfg['slim']:\n",
" slim.quant_config['activation_preprocess_type'] = None\n",
" cfg['model'] = slim(model)\n",
" cfg['slim'] = slim\n",
" if mode != 'train':\n",
" load_pretrain_weight(cfg['model'], cfg.weights)\n",
" return cfg\n",
"\n",
"Masked Code:\n",
"def build_slim_model(cfg, slim_cfg, mode='train'):\n",
" with open(slim_cfg) as f:\n",
" slim_load_cfg = yaml.load(f, Loader=yaml.Loader)\n",
" <mask>\n",
" return cfg\n",
" if slim_load_cfg['slim'] == 'Distill':\n",
" if 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'FGD':\n",
" model = FGDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'LD':\n",
" model = LDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'CWD':\n",
" model = CWDDistillModel(cfg, slim_cfg)\n",
" elif 'slim_method' in slim_load_cfg and slim_load_cfg['slim_method'] == 'PPYOLOEDistill':\n",
" model = PPYOLOEDistillModel(cfg, slim_cfg)\n",
" else:\n",
" model = DistillModel(cfg, slim_cfg)\n",
" cfg['model'] = model\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'OFA':\n",
" load_config(slim_cfg)\n",
" model = create(cfg.architecture)\n",
" load_pretrain_weight(model, cfg.weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim'] = slim\n",
" cfg['model'] = slim(model, model.state_dict())\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'DistillPrune':\n",
" if mode == 'train':\n",
" model = DistillModel(cfg, slim_cfg)\n",
" pruner = create(cfg.pruner)\n",
" pruner(model.student_model)\n",
" else:\n",
" model = create(cfg.architecture)\n",
" weights = cfg.weights\n",
" load_config(slim_cfg)\n",
" pruner = create(cfg.pruner)\n",
" model = pruner(model)\n",
" load_pretrain_weight(model, weights)\n",
" cfg['model'] = model\n",
" cfg['slim_type'] = cfg.slim\n",
" elif slim_load_cfg['slim'] == 'PTQ':\n",
" model = create(cfg.architecture)\n",
" load_config(slim_cfg)\n",
" load_pretrain_weight(model, cfg.weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" cfg['slim'] = slim\n",
" cfg['model'] = slim(model)\n",
" elif slim_load_cfg['slim'] == 'UnstructuredPruner':\n",
" load_config(slim_cfg)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" cfg['slim'] = slim\n",
" cfg['unstructured_prune'] = True\n",
" else:\n",
" load_config(slim_cfg)\n",
" model = create(cfg.architecture)\n",
" if mode == 'train':\n",
" load_pretrain_weight(model, cfg.pretrain_weights)\n",
" slim = create(cfg.slim)\n",
" cfg['slim_type'] = cfg.slim\n",
" if mode == 'test' and 'QAT' in slim_load_cfg['slim']:\n",
" slim.quant_config['activation_preprocess_type'] = None\n",
" cfg['model'] = slim(model)\n",
" cfg['slim'] = slim\n",
" if mode != 'train':\n",
" load_pretrain_weight(cfg['model'], cfg.weights)\n",
" return cfg\n",
"\n",
"Ground Truth:\n",
"if mode != 'train' and slim_load_cfg['slim'] == 'Distill':\n",
"\n",
"Model Prediction:\n",
"if mode == 'train':\n",
"\n",
"==================================================\n",
"\n",
"Item 1927:\n",
"Original Code:\n",
"def SignApk(data, keyname, pw, platform_api_level, codename_to_api_level_map, is_compressed, apk_name):\n",
" unsigned = tempfile.NamedTemporaryFile(suffix='_' + apk_name)\n",
" unsigned.write(data)\n",
" unsigned.flush()\n",
" if is_compressed:\n",
" uncompressed = tempfile.NamedTemporaryFile()\n",
" with gzip.open(unsigned.name, 'rb') as in_file, open(uncompressed.name, 'wb') as out_file:\n",
" shutil.copyfileobj(in_file, out_file)\n",
" unsigned.close()\n",
" unsigned = uncompressed\n",
" signed = tempfile.NamedTemporaryFile(suffix='_' + apk_name)\n",
" min_api_level = None\n",
" if platform_api_level > 23:\n",
" min_api_level = None\n",
" else:\n",
" min_api_level = 1\n",
" common.SignFile(unsigned.name, signed.name, keyname, pw, min_api_level=min_api_level, codename_to_api_level_map=codename_to_api_level_map)\n",
" data = None\n",
" if is_compressed:\n",
" compressed = tempfile.NamedTemporaryFile()\n",
" with open(signed.name, 'rb') as in_file, gzip.open(compressed.name, 'wb') as out_file:\n",
" shutil.copyfileobj(in_file, out_file)\n",
" data = compressed.read()\n",
" compressed.close()\n",
" else:\n",
" data = signed.read()\n",
" unsigned.close()\n",
" signed.close()\n",
" return data\n",
"\n",
"Masked Code:\n",
"def SignApk(data, keyname, pw, platform_api_level, codename_to_api_level_map, is_compressed, apk_name):\n",
" unsigned = tempfile.NamedTemporaryFile(suffix='_' + apk_name)\n",
" unsigned.write(data)\n",
" unsigned.flush()\n",
" <mask>\n",
" uncompressed = tempfile.NamedTemporaryFile()\n",
" with gzip.open(unsigned.name, 'rb') as in_file, open(uncompressed.name, 'wb') as out_file:\n",
" shutil.copyfileobj(in_file, out_file)\n",
" unsigned.close()\n",
" unsigned = uncompressed\n",
" signed = tempfile.NamedTemporaryFile(suffix='_' + apk_name)\n",
" min_api_level = None\n",
" if platform_api_level > 23:\n",
" min_api_level = None\n",
" else:\n",
" min_api_level = 1\n",
" common.SignFile(unsigned.name, signed.name, keyname, pw, min_api_level=min_api_level, codename_to_api_level_map=codename_to_api_level_map)\n",
" data = None\n",
" if is_compressed:\n",
" compressed = tempfile.NamedTemporaryFile()\n",
" with open(signed.name, 'rb') as in_file, gzip.open(compressed.name, 'wb') as out_file:\n",
" shutil.copyfileobj(in_file, out_file)\n",
" data = compressed.read()\n",
" compressed.close()\n",
" else:\n",
" data = signed.read()\n",
" unsigned.close()\n",
" signed.close()\n",
" return data\n",
"\n",
"Ground Truth:\n",
"if is_compressed:\n",
"\n",
"Model Prediction:\n",
"if is_compressed:\n",
"\n",
"==================================================\n",
"\n",
"Item 4417:\n",
"Original Code:\n",
"def apply(self, sample, context=None):\n",
" if random.random() < self.prob:\n",
" _resize = self._format_size(random.choice(self.resizes))\n",
" _cropsize = self._format_size(random.choice(self.cropsizes))\n",
" sample = self._resize(self.resizer, sample, size=_resize, mode=self.mode, context=context)\n",
" sample = self._random_crop(self.croper, sample, size=_cropsize, context=context)\n",
" return sample\n",
"\n",
"Masked Code:\n",
"def apply(self, sample, context=None):\n",
" <mask>\n",
" _resize = self._format_size(random.choice(self.resizes))\n",
" _cropsize = self._format_size(random.choice(self.cropsizes))\n",
" sample = self._resize(self.resizer, sample, size=_resize, mode=self.mode, context=context)\n",
" sample = self._random_crop(self.croper, sample, size=_cropsize, context=context)\n",
" return sample\n",
"\n",
"Ground Truth:\n",
"if random.random() < self.prob:\n",
"\n",
"Model Prediction:\n",
"if self.crop_mode == self.crop_mode):\n",
"\n",
"==================================================\n",
"\n",
"Item 2332:\n",
"Original Code:\n",
"def _read_dtbo_header(self, buf):\n",
" \"\"\"Reads DTBO file header into metadata buffer.\n",
"\n",
" Unpack and read the DTBO table header from given buffer. The\n",
" buffer size must exactly be equal to _DT_TABLE_HEADER_SIZE.\n",
"\n",
" Args:\n",
" buf: Bytebuffer read directly from the file of size\n",
" _DT_TABLE_HEADER_SIZE.\n",
" \"\"\"\n",
" self.magic, self.total_size, self.header_size, self.dt_entry_size, self.dt_entry_count, self.dt_entries_offset, self.page_size, self.version = struct.unpack_from('>8I', buf, 0)\n",
" if self.magic != self._DTBO_MAGIC and self.magic != self._ACPIO_MAGIC:\n",
" raise ValueError('Invalid magic number 0x%x in DTBO/ACPIO file' % self.magic)\n",
" if self.header_size != self._DT_TABLE_HEADER_SIZE:\n",
" raise ValueError('Invalid header size (%d) in DTBO/ACPIO file' % self.header_size)\n",
" if self.dt_entry_size != self._DT_ENTRY_HEADER_SIZE:\n",
" raise ValueError('Invalid DT entry header size (%d) in DTBO/ACPIO file' % self.dt_entry_size)\n",
"\n",
"Masked Code:\n",
"def _read_dtbo_header(self, buf):\n",
" \"\"\"Reads DTBO file header into metadata buffer.\n",
"\n",
" Unpack and read the DTBO table header from given buffer. The\n",
" buffer size must exactly be equal to _DT_TABLE_HEADER_SIZE.\n",
"\n",
" Args:\n",
" buf: Bytebuffer read directly from the file of size\n",
" _DT_TABLE_HEADER_SIZE.\n",
" \"\"\"\n",
" self.magic, self.total_size, self.header_size, self.dt_entry_size, self.dt_entry_count, self.dt_entries_offset, self.page_size, self.version = struct.unpack_from('>8I', buf, 0)\n",
" <mask>\n",
" raise ValueError('Invalid magic number 0x%x in DTBO/ACPIO file' % self.magic)\n",
" if self.header_size != self._DT_TABLE_HEADER_SIZE:\n",
" raise ValueError('Invalid header size (%d) in DTBO/ACPIO file' % self.header_size)\n",
" if self.dt_entry_size != self._DT_ENTRY_HEADER_SIZE:\n",
" raise ValueError('Invalid DT entry header size (%d) in DTBO/ACPIO file' % self.dt_entry_size)\n",
"\n",
"Ground Truth:\n",
"if self.magic != self._DTBO_MAGIC and self.magic != self._ACPIO_MAGIC:\n",
"\n",
"Model Prediction:\n",
"if self.magic not in self.header_size:\n",
"\n",
"==================================================\n",
"\n",
"Item 2324:\n",
"Original Code:\n",
"@patch_info.setter\n",
"def patch_info(self, info):\n",
" if info:\n",
" assert self.style == 'diff'\n",
" self._patch_info = info\n",
"\n",
"Masked Code:\n",
"@patch_info.setter\n",
"def patch_info(self, info):\n",
" <mask>\n",
" assert self.style == 'diff'\n",
" self._patch_info = info\n",
"\n",
"Ground Truth:\n",
"if info:\n",
"\n",
"Model Prediction:\n",
"if self.style == 'diff'\n",
"\n",
"==================================================\n",
"\n"
]
}
]
}
]
}