{
"cells": [
{
"cell_type": "code",
"execution_count": 31,
"id": "4abbae95",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import time\n",
"from pymongo import MongoClient"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1d3a065",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 75,
"id": "f3196535",
"metadata": {},
"outputs": [],
"source": [
"mongo_conn_str = \"mongodb://localhost:27017\"\n",
"mongo = MongoClient(mongo_conn_str)\n",
"db = mongo[\"ddm\"]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "264e24b7",
"metadata": {},
"outputs": [],
"source": [
"def start_the_time():\n",
" global start_time\n",
" start_time = time.time()\n",
" \n",
"def end_the_time():\n",
" print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
{
"cell_type": "markdown",
"id": "9becfc2a",
"metadata": {},
"source": [
"### Top 10 journals for numbers of papers"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "8e98cd86",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- 0.0001361370086669922 seconds ---\n",
"['Morphometric MRI as a diagnostic biomarker of frontotemporal dementia: A systematic review to determine clinical applicability', 'Low adherence of Swiss children to national dietary guidelines', 'Decomposing broadcast algorithms using abstract MAC layers']\n"
]
}
],
"source": [
"start_the_time()\n",
"result = db[\"papers\"].find({ \n",
" \"authors.email\": {\"$regex\": \"@usi\\.ch\"}\n",
"}, {\n",
" 'title': 1\n",
"})\n",
"end_the_time()\n",
"\n",
"titles = [doc['title'] for doc in result]\n",
"print(titles)"
]
},
{
"cell_type": "markdown",
"id": "c0e9ad5e",
"metadata": {},
"source": [
"### Most 3 cited authors in 'Strategic info-mediaries'"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "a8781d01",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- 0.302872896194458 seconds ---\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" _id | \n",
" referenceCount | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Freshwater | \n",
" 12 | \n",
"
\n",
" \n",
" 1 | \n",
" Biodiversity | \n",
" 9 | \n",
"
\n",
" \n",
" 2 | \n",
" Marine | \n",
" 8 | \n",
"
\n",
" \n",
" 3 | \n",
" Climate change | \n",
" 8 | \n",
"
\n",
" \n",
" 4 | \n",
" Ecosystem-based management | \n",
" 7 | \n",
"
\n",
" \n",
" 5 | \n",
" Coastal | \n",
" 6 | \n",
"
\n",
" \n",
" 6 | \n",
" Eutrophication | \n",
" 5 | \n",
"
\n",
" \n",
" 7 | \n",
" Phosphorus | \n",
" 5 | \n",
"
\n",
" \n",
" 8 | \n",
" Policy | \n",
" 5 | \n",
"
\n",
" \n",
" 9 | \n",
" Agriculture | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" _id referenceCount\n",
"0 Freshwater 12\n",
"1 Biodiversity 9\n",
"2 Marine 8\n",
"3 Climate change 8\n",
"4 Ecosystem-based management 7\n",
"5 Coastal 6\n",
"6 Eutrophication 5\n",
"7 Phosphorus 5\n",
"8 Policy 5\n",
"9 Agriculture 4"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline = [\n",
" {\n",
" \"$match\": {\n",
" \"publicationDetails.journal\": \"Vertical e-markets\"\n",
" }\n",
" },\n",
" { \n",
" \"$unwind\": \"$authors\" \n",
" }, \n",
" { \n",
" \"$lookup\": {\n",
" \"from\": \"authors\",\n",
" \"localField\": \"authors.authorId\",\n",
" \"foreignField\": \"_id\",\n",
" \"as\": \"authors\"\n",
" }\n",
" },\n",
" {\n",
" \"$match\": {\n",
" \"authors.bio\": {\n",
" \"$regex\": \"[Ss]uccess\"\n",
" }\n",
" }\n",
" },\n",
" { \n",
" \"$unwind\": \"$keywords\" \n",
" },\n",
" { \n",
" \"$group\": {\n",
" \"_id\": \"$keywords\", \n",
" \"referenceCount\": { \n",
" \"$sum\": 1\n",
" } \n",
" } \n",
" },\n",
" {\n",
" \"$sort\": {\n",
" \"referenceCount\": -1\n",
" }\n",
" },\n",
" {\n",
" \"$limit\": 10\n",
" }\n",
"]\n",
"\n",
"start_the_time()\n",
"result = db[\"papers\"].aggregate(pipeline)\n",
"end_the_time()\n",
"\n",
"pd.DataFrame(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fc8b56f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "39ae3826",
"metadata": {},
"source": [
"### Title"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "146fef1e",
"metadata": {},
"outputs": [],
"source": [
"pipeline = [\n",
" {\n",
" \"$match\": {\n",
" \"publicationDetails.journal\": \"Next-generation users\",\n",
" }\n",
" },\n",
" { \"$unwind\": \"$authors\" }, \n",
" { \n",
" \"$group\": {\n",
" \"_id\": \"$references.authors.name\", \n",
" \"referenceCount\": { \n",
" \"$sum\": 1\n",
" } \n",
" } \n",
" },\n",
" {\n",
" \"$sort\": {\n",
" \"referenceCount\": -1\n",
" }\n",
" },\n",
" {\n",
" \"$limit\": 3\n",
" }\n",
"]\n",
"\n",
"start_the_time()\n",
"result = db[\"papers\"].aggregate(pipeline)\n",
"end_the_time()\n",
"\n",
"pd.DataFrame(result)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "5c02ad39",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- 0.0010950565338134766 seconds ---\n"
]
}
],
"source": [
"new_journal = { \n",
" 'issn': '89012388',\n",
" 'name': 'Advanced Topics on Databases',\n",
" 'volumes': []\n",
"}\n",
"start_the_time()\n",
"new_journal_id = db[\"journals\"].insert_one(new_journal).inserted_id\n",
"end_the_time()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "2f128b04",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- 0.09116077423095703 seconds ---\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" _id | \n",
" paper_number | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" One-to-one content | \n",
" 744 | \n",
"
\n",
" \n",
" 1 | \n",
" Vertical e-markets | \n",
" 515 | \n",
"
\n",
" \n",
" 2 | \n",
" Bricks-and-clicks web-readiness | \n",
" 483 | \n",
"
\n",
" \n",
" 3 | \n",
" Plug-and-play web-readiness | \n",
" 361 | \n",
"
\n",
" \n",
" 4 | \n",
" Back-end partnerships | \n",
" 354 | \n",
"
\n",
" \n",
" 5 | \n",
" Next-generation users | \n",
" 334 | \n",
"
\n",
" \n",
" 6 | \n",
" Distributed mindshare | \n",
" 329 | \n",
"
\n",
" \n",
" 7 | \n",
" Enterprise e-services | \n",
" 281 | \n",
"
\n",
" \n",
" 8 | \n",
" Strategic info-mediaries | \n",
" 276 | \n",
"
\n",
" \n",
" 9 | \n",
" Clicks-and-mortar channels | \n",
" 271 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" _id paper_number\n",
"0 One-to-one content 744\n",
"1 Vertical e-markets 515\n",
"2 Bricks-and-clicks web-readiness 483\n",
"3 Plug-and-play web-readiness 361\n",
"4 Back-end partnerships 354\n",
"5 Next-generation users 334\n",
"6 Distributed mindshare 329\n",
"7 Enterprise e-services 281\n",
"8 Strategic info-mediaries 276\n",
"9 Clicks-and-mortar channels 271"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline = [{\n",
" \"$group\": {\n",
" \"_id\":\"$publicationDetails.journal\",\n",
" \"paper_number\":{\n",
" \"$sum\":1\n",
" }\n",
" }\n",
"},{\n",
" \"$sort\":{\n",
" \"paper_number\":-1\n",
" }\n",
"},{\n",
" \"$limit\":10\n",
"}]\n",
"\n",
"start_the_time()\n",
"result = db[\"papers\"].aggregate(pipeline)\n",
"end_the_time()\n",
"\n",
"pd.DataFrame(result)"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "8f12712b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- 1.3057661056518555 seconds ---\n"
]
},
{
"data": {
"text/plain": [
"'{\"_id\":{\"0\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Introduction\"},\"1\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Discussion\"},\"2\":{\"journal\":\"Vertical e-markets\",\"sectionTitle\":\"Introduction\"},\"3\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Results\"},\"4\":{\"journal\":\"Bricks-and-clicks web-readiness\",\"sectionTitle\":\"Method details\"},\"5\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Introduction\"},\"6\":{\"journal\":\"Back-end partnerships\",\"sectionTitle\":\"Introduction\"},\"7\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Introduction\"},\"8\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Discussion\"},\"9\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Results\"}},\"sectionCount\":{\"0\":630,\"1\":512,\"2\":506,\"3\":503,\"4\":371,\"5\":353,\"6\":350,\"7\":332,\"8\":330,\"9\":322}}'"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline = [\n",
" { \n",
" \"$unwind\": \"$content\"\n",
" }, {\n",
" \"$group\": {\n",
" \"_id\": {\n",
" \"journal\": \"$publicationDetails.journal\",\n",
" \"sectionTitle\": \"$content.title\"\n",
" }, \n",
" \"sectionCount\": {\n",
" \"$sum\": 1\n",
" }\n",
" }\n",
" }, {\n",
" \"$sort\": {\n",
" \"sectionCount\": -1\n",
" }\n",
" }, {\n",
" \"$limit\":10\n",
" }\n",
"]\n",
"\n",
"start_the_time()\n",
"result = db[\"papers\"].aggregate(pipeline)\n",
"end_the_time()\n",
"\n",
"pd.DataFrame(result).to_json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecfd45d9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}