559 lines
15 KiB
Plaintext
559 lines
15 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 31,
|
||
|
"id": "4abbae95",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import time\n",
|
||
|
"from pymongo import MongoClient"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "c1d3a065",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 75,
|
||
|
"id": "f3196535",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"mongo_conn_str = \"mongodb://localhost:27017\"\n",
|
||
|
"mongo = MongoClient(mongo_conn_str)\n",
|
||
|
"db = mongo[\"ddm\"]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 33,
|
||
|
"id": "264e24b7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def start_the_time():\n",
|
||
|
" global start_time\n",
|
||
|
" start_time = time.time()\n",
|
||
|
" \n",
|
||
|
"def end_the_time():\n",
|
||
|
" print(\"--- %s seconds ---\" % (time.time() - start_time))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "9becfc2a",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Top 10 journals for numbers of papers"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 80,
|
||
|
"id": "8e98cd86",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"--- 0.0001361370086669922 seconds ---\n",
|
||
|
"['Morphometric MRI as a diagnostic biomarker of frontotemporal dementia: A systematic review to determine clinical applicability', 'Low adherence of Swiss children to national dietary guidelines', 'Decomposing broadcast algorithms using abstract MAC layers']\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"start_the_time()\n",
|
||
|
"result = db[\"papers\"].find({ \n",
|
||
|
" \"authors.email\": {\"$regex\": \"@usi\\.ch\"}\n",
|
||
|
"}, {\n",
|
||
|
" 'title': 1\n",
|
||
|
"})\n",
|
||
|
"end_the_time()\n",
|
||
|
"\n",
|
||
|
"titles = [doc['title'] for doc in result]\n",
|
||
|
"print(titles)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "c0e9ad5e",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Most 3 cited authors in 'Strategic info-mediaries'"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 101,
|
||
|
"id": "a8781d01",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"--- 0.302872896194458 seconds ---\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>_id</th>\n",
|
||
|
" <th>referenceCount</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>Freshwater</td>\n",
|
||
|
" <td>12</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>Biodiversity</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>Marine</td>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>Climate change</td>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>Ecosystem-based management</td>\n",
|
||
|
" <td>7</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>Coastal</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>Eutrophication</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>Phosphorus</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>Policy</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>Agriculture</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" _id referenceCount\n",
|
||
|
"0 Freshwater 12\n",
|
||
|
"1 Biodiversity 9\n",
|
||
|
"2 Marine 8\n",
|
||
|
"3 Climate change 8\n",
|
||
|
"4 Ecosystem-based management 7\n",
|
||
|
"5 Coastal 6\n",
|
||
|
"6 Eutrophication 5\n",
|
||
|
"7 Phosphorus 5\n",
|
||
|
"8 Policy 5\n",
|
||
|
"9 Agriculture 4"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 101,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pipeline = [\n",
|
||
|
" {\n",
|
||
|
" \"$match\": {\n",
|
||
|
" \"publicationDetails.journal\": \"Vertical e-markets\"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" { \n",
|
||
|
" \"$unwind\": \"$authors\" \n",
|
||
|
" }, \n",
|
||
|
" { \n",
|
||
|
" \"$lookup\": {\n",
|
||
|
" \"from\": \"authors\",\n",
|
||
|
" \"localField\": \"authors.authorId\",\n",
|
||
|
" \"foreignField\": \"_id\",\n",
|
||
|
" \"as\": \"authors\"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" {\n",
|
||
|
" \"$match\": {\n",
|
||
|
" \"authors.bio\": {\n",
|
||
|
" \"$regex\": \"[Ss]uccess\"\n",
|
||
|
" }\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" { \n",
|
||
|
" \"$unwind\": \"$keywords\" \n",
|
||
|
" },\n",
|
||
|
" { \n",
|
||
|
" \"$group\": {\n",
|
||
|
" \"_id\": \"$keywords\", \n",
|
||
|
" \"referenceCount\": { \n",
|
||
|
" \"$sum\": 1\n",
|
||
|
" } \n",
|
||
|
" } \n",
|
||
|
" },\n",
|
||
|
" {\n",
|
||
|
" \"$sort\": {\n",
|
||
|
" \"referenceCount\": -1\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" {\n",
|
||
|
" \"$limit\": 10\n",
|
||
|
" }\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"start_the_time()\n",
|
||
|
"result = db[\"papers\"].aggregate(pipeline)\n",
|
||
|
"end_the_time()\n",
|
||
|
"\n",
|
||
|
"pd.DataFrame(result)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "5fc8b56f",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "39ae3826",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Title"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "146fef1e",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"pipeline = [\n",
|
||
|
" {\n",
|
||
|
" \"$match\": {\n",
|
||
|
" \"publicationDetails.journal\": \"Next-generation users\",\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" { \"$unwind\": \"$authors\" }, \n",
|
||
|
" { \n",
|
||
|
" \"$group\": {\n",
|
||
|
" \"_id\": \"$references.authors.name\", \n",
|
||
|
" \"referenceCount\": { \n",
|
||
|
" \"$sum\": 1\n",
|
||
|
" } \n",
|
||
|
" } \n",
|
||
|
" },\n",
|
||
|
" {\n",
|
||
|
" \"$sort\": {\n",
|
||
|
" \"referenceCount\": -1\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" {\n",
|
||
|
" \"$limit\": 3\n",
|
||
|
" }\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"start_the_time()\n",
|
||
|
"result = db[\"papers\"].aggregate(pipeline)\n",
|
||
|
"end_the_time()\n",
|
||
|
"\n",
|
||
|
"pd.DataFrame(result)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 48,
|
||
|
"id": "5c02ad39",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"--- 0.0010950565338134766 seconds ---\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"new_journal = { \n",
|
||
|
" 'issn': '89012388',\n",
|
||
|
" 'name': 'Advanced Topics on Databases',\n",
|
||
|
" 'volumes': []\n",
|
||
|
"}\n",
|
||
|
"start_the_time()\n",
|
||
|
"new_journal_id = db[\"journals\"].insert_one(new_journal).inserted_id\n",
|
||
|
"end_the_time()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 95,
|
||
|
"id": "2f128b04",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"--- 0.09116077423095703 seconds ---\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>_id</th>\n",
|
||
|
" <th>paper_number</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>One-to-one content</td>\n",
|
||
|
" <td>744</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>Vertical e-markets</td>\n",
|
||
|
" <td>515</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>Bricks-and-clicks web-readiness</td>\n",
|
||
|
" <td>483</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>Plug-and-play web-readiness</td>\n",
|
||
|
" <td>361</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>Back-end partnerships</td>\n",
|
||
|
" <td>354</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>Next-generation users</td>\n",
|
||
|
" <td>334</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>Distributed mindshare</td>\n",
|
||
|
" <td>329</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>Enterprise e-services</td>\n",
|
||
|
" <td>281</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>Strategic info-mediaries</td>\n",
|
||
|
" <td>276</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>Clicks-and-mortar channels</td>\n",
|
||
|
" <td>271</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" _id paper_number\n",
|
||
|
"0 One-to-one content 744\n",
|
||
|
"1 Vertical e-markets 515\n",
|
||
|
"2 Bricks-and-clicks web-readiness 483\n",
|
||
|
"3 Plug-and-play web-readiness 361\n",
|
||
|
"4 Back-end partnerships 354\n",
|
||
|
"5 Next-generation users 334\n",
|
||
|
"6 Distributed mindshare 329\n",
|
||
|
"7 Enterprise e-services 281\n",
|
||
|
"8 Strategic info-mediaries 276\n",
|
||
|
"9 Clicks-and-mortar channels 271"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 95,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pipeline = [{\n",
|
||
|
" \"$group\": {\n",
|
||
|
" \"_id\":\"$publicationDetails.journal\",\n",
|
||
|
" \"paper_number\":{\n",
|
||
|
" \"$sum\":1\n",
|
||
|
" }\n",
|
||
|
" }\n",
|
||
|
"},{\n",
|
||
|
" \"$sort\":{\n",
|
||
|
" \"paper_number\":-1\n",
|
||
|
" }\n",
|
||
|
"},{\n",
|
||
|
" \"$limit\":10\n",
|
||
|
"}]\n",
|
||
|
"\n",
|
||
|
"start_the_time()\n",
|
||
|
"result = db[\"papers\"].aggregate(pipeline)\n",
|
||
|
"end_the_time()\n",
|
||
|
"\n",
|
||
|
"pd.DataFrame(result)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 132,
|
||
|
"id": "8f12712b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"--- 1.3057661056518555 seconds ---\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'{\"_id\":{\"0\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Introduction\"},\"1\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Discussion\"},\"2\":{\"journal\":\"Vertical e-markets\",\"sectionTitle\":\"Introduction\"},\"3\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Results\"},\"4\":{\"journal\":\"Bricks-and-clicks web-readiness\",\"sectionTitle\":\"Method details\"},\"5\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Introduction\"},\"6\":{\"journal\":\"Back-end partnerships\",\"sectionTitle\":\"Introduction\"},\"7\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Introduction\"},\"8\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Discussion\"},\"9\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Results\"}},\"sectionCount\":{\"0\":630,\"1\":512,\"2\":506,\"3\":503,\"4\":371,\"5\":353,\"6\":350,\"7\":332,\"8\":330,\"9\":322}}'"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 132,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pipeline = [\n",
|
||
|
" { \n",
|
||
|
" \"$unwind\": \"$content\"\n",
|
||
|
" }, {\n",
|
||
|
" \"$group\": {\n",
|
||
|
" \"_id\": {\n",
|
||
|
" \"journal\": \"$publicationDetails.journal\",\n",
|
||
|
" \"sectionTitle\": \"$content.title\"\n",
|
||
|
" }, \n",
|
||
|
" \"sectionCount\": {\n",
|
||
|
" \"$sum\": 1\n",
|
||
|
" }\n",
|
||
|
" }\n",
|
||
|
" }, {\n",
|
||
|
" \"$sort\": {\n",
|
||
|
" \"sectionCount\": -1\n",
|
||
|
" }\n",
|
||
|
" }, {\n",
|
||
|
" \"$limit\":10\n",
|
||
|
" }\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"start_the_time()\n",
|
||
|
"result = db[\"papers\"].aggregate(pipeline)\n",
|
||
|
"end_the_time()\n",
|
||
|
"\n",
|
||
|
"pd.DataFrame(result).to_json()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "ecfd45d9",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.10.8"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|