{ "cells": [ { "cell_type": "code", "execution_count": 31, "id": "4abbae95", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import time\n", "from pymongo import MongoClient" ] }, { "cell_type": "code", "execution_count": null, "id": "c1d3a065", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 75, "id": "f3196535", "metadata": {}, "outputs": [], "source": [ "mongo_conn_str = \"mongodb://localhost:27017\"\n", "mongo = MongoClient(mongo_conn_str)\n", "db = mongo[\"ddm\"]" ] }, { "cell_type": "code", "execution_count": 33, "id": "264e24b7", "metadata": {}, "outputs": [], "source": [ "def start_the_time():\n", " global start_time\n", " start_time = time.time()\n", " \n", "def end_the_time():\n", " print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { "cell_type": "markdown", "id": "9becfc2a", "metadata": {}, "source": [ "### Top 10 journals for numbers of papers" ] }, { "cell_type": "code", "execution_count": 80, "id": "8e98cd86", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 0.0001361370086669922 seconds ---\n", "['Morphometric MRI as a diagnostic biomarker of frontotemporal dementia: A systematic review to determine clinical applicability', 'Low adherence of Swiss children to national dietary guidelines', 'Decomposing broadcast algorithms using abstract MAC layers']\n" ] } ], "source": [ "start_the_time()\n", "result = db[\"papers\"].find({ \n", " \"authors.email\": {\"$regex\": \"@usi\\.ch\"}\n", "}, {\n", " 'title': 1\n", "})\n", "end_the_time()\n", "\n", "titles = [doc['title'] for doc in result]\n", "print(titles)" ] }, { "cell_type": "markdown", "id": "c0e9ad5e", "metadata": {}, "source": [ "### Most 3 cited authors in 'Strategic info-mediaries'" ] }, { "cell_type": "code", "execution_count": 101, "id": "a8781d01", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 0.302872896194458 seconds ---\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idreferenceCount
0Freshwater12
1Biodiversity9
2Marine8
3Climate change8
4Ecosystem-based management7
5Coastal6
6Eutrophication5
7Phosphorus5
8Policy5
9Agriculture4
\n", "
" ], "text/plain": [ " _id referenceCount\n", "0 Freshwater 12\n", "1 Biodiversity 9\n", "2 Marine 8\n", "3 Climate change 8\n", "4 Ecosystem-based management 7\n", "5 Coastal 6\n", "6 Eutrophication 5\n", "7 Phosphorus 5\n", "8 Policy 5\n", "9 Agriculture 4" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = [\n", " {\n", " \"$match\": {\n", " \"publicationDetails.journal\": \"Vertical e-markets\"\n", " }\n", " },\n", " { \n", " \"$unwind\": \"$authors\" \n", " }, \n", " { \n", " \"$lookup\": {\n", " \"from\": \"authors\",\n", " \"localField\": \"authors.authorId\",\n", " \"foreignField\": \"_id\",\n", " \"as\": \"authors\"\n", " }\n", " },\n", " {\n", " \"$match\": {\n", " \"authors.bio\": {\n", " \"$regex\": \"[Ss]uccess\"\n", " }\n", " }\n", " },\n", " { \n", " \"$unwind\": \"$keywords\" \n", " },\n", " { \n", " \"$group\": {\n", " \"_id\": \"$keywords\", \n", " \"referenceCount\": { \n", " \"$sum\": 1\n", " } \n", " } \n", " },\n", " {\n", " \"$sort\": {\n", " \"referenceCount\": -1\n", " }\n", " },\n", " {\n", " \"$limit\": 10\n", " }\n", "]\n", "\n", "start_the_time()\n", "result = db[\"papers\"].aggregate(pipeline)\n", "end_the_time()\n", "\n", "pd.DataFrame(result)" ] }, { "cell_type": "code", "execution_count": null, "id": "5fc8b56f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "39ae3826", "metadata": {}, "source": [ "### Title" ] }, { "cell_type": "code", "execution_count": null, "id": "146fef1e", "metadata": {}, "outputs": [], "source": [ "pipeline = [\n", " {\n", " \"$match\": {\n", " \"publicationDetails.journal\": \"Next-generation users\",\n", " }\n", " },\n", " { \"$unwind\": \"$authors\" }, \n", " { \n", " \"$group\": {\n", " \"_id\": \"$references.authors.name\", \n", " \"referenceCount\": { \n", " \"$sum\": 1\n", " } \n", " } \n", " },\n", " {\n", " \"$sort\": {\n", " \"referenceCount\": -1\n", " }\n", " },\n", " {\n", " \"$limit\": 3\n", " }\n", "]\n", "\n", "start_the_time()\n", "result = db[\"papers\"].aggregate(pipeline)\n", "end_the_time()\n", "\n", "pd.DataFrame(result)" ] }, { "cell_type": "code", "execution_count": 48, "id": "5c02ad39", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 0.0010950565338134766 seconds ---\n" ] } ], "source": [ "new_journal = { \n", " 'issn': '89012388',\n", " 'name': 'Advanced Topics on Databases',\n", " 'volumes': []\n", "}\n", "start_the_time()\n", "new_journal_id = db[\"journals\"].insert_one(new_journal).inserted_id\n", "end_the_time()" ] }, { "cell_type": "code", "execution_count": 95, "id": "2f128b04", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 0.09116077423095703 seconds ---\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idpaper_number
0One-to-one content744
1Vertical e-markets515
2Bricks-and-clicks web-readiness483
3Plug-and-play web-readiness361
4Back-end partnerships354
5Next-generation users334
6Distributed mindshare329
7Enterprise e-services281
8Strategic info-mediaries276
9Clicks-and-mortar channels271
\n", "
" ], "text/plain": [ " _id paper_number\n", "0 One-to-one content 744\n", "1 Vertical e-markets 515\n", "2 Bricks-and-clicks web-readiness 483\n", "3 Plug-and-play web-readiness 361\n", "4 Back-end partnerships 354\n", "5 Next-generation users 334\n", "6 Distributed mindshare 329\n", "7 Enterprise e-services 281\n", "8 Strategic info-mediaries 276\n", "9 Clicks-and-mortar channels 271" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = [{\n", " \"$group\": {\n", " \"_id\":\"$publicationDetails.journal\",\n", " \"paper_number\":{\n", " \"$sum\":1\n", " }\n", " }\n", "},{\n", " \"$sort\":{\n", " \"paper_number\":-1\n", " }\n", "},{\n", " \"$limit\":10\n", "}]\n", "\n", "start_the_time()\n", "result = db[\"papers\"].aggregate(pipeline)\n", "end_the_time()\n", "\n", "pd.DataFrame(result)" ] }, { "cell_type": "code", "execution_count": 132, "id": "8f12712b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 1.3057661056518555 seconds ---\n" ] }, { "data": { "text/plain": [ "'{\"_id\":{\"0\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Introduction\"},\"1\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Discussion\"},\"2\":{\"journal\":\"Vertical e-markets\",\"sectionTitle\":\"Introduction\"},\"3\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Results\"},\"4\":{\"journal\":\"Bricks-and-clicks web-readiness\",\"sectionTitle\":\"Method details\"},\"5\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Introduction\"},\"6\":{\"journal\":\"Back-end partnerships\",\"sectionTitle\":\"Introduction\"},\"7\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Introduction\"},\"8\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Discussion\"},\"9\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Results\"}},\"sectionCount\":{\"0\":630,\"1\":512,\"2\":506,\"3\":503,\"4\":371,\"5\":353,\"6\":350,\"7\":332,\"8\":330,\"9\":322}}'" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = [\n", " { \n", " \"$unwind\": \"$content\"\n", " }, {\n", " \"$group\": {\n", " \"_id\": {\n", " \"journal\": \"$publicationDetails.journal\",\n", " \"sectionTitle\": \"$content.title\"\n", " }, \n", " \"sectionCount\": {\n", " \"$sum\": 1\n", " }\n", " }\n", " }, {\n", " \"$sort\": {\n", " \"sectionCount\": -1\n", " }\n", " }, {\n", " \"$limit\":10\n", " }\n", "]\n", "\n", "start_the_time()\n", "result = db[\"papers\"].aggregate(pipeline)\n", "end_the_time()\n", "\n", "pd.DataFrame(result).to_json()" ] }, { "cell_type": "code", "execution_count": null, "id": "ecfd45d9", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }