{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "4abbae95",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import time\n",
    "from pymongo import MongoClient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1d3a065",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "f3196535",
   "metadata": {},
   "outputs": [],
   "source": [
    "mongo_conn_str = \"mongodb://localhost:27017\"\n",
    "mongo = MongoClient(mongo_conn_str)\n",
    "db = mongo[\"ddm\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "264e24b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def start_the_time():\n",
    "    global start_time\n",
    "    start_time = time.time()\n",
    "    \n",
    "def end_the_time():\n",
    "    print(\"--- %s seconds ---\" % (time.time() - start_time))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9becfc2a",
   "metadata": {},
   "source": [
    "### Top 10 journals for numbers of papers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "8e98cd86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- 0.0001361370086669922 seconds ---\n",
      "['Morphometric MRI as a diagnostic biomarker of frontotemporal dementia: A systematic review to determine clinical applicability', 'Low adherence of Swiss children to national dietary guidelines', 'Decomposing broadcast algorithms using abstract MAC layers']\n"
     ]
    }
   ],
   "source": [
    "start_the_time()\n",
    "result = db[\"papers\"].find({ \n",
    "    \"authors.email\": {\"$regex\": \"@usi\\.ch\"}\n",
    "}, {\n",
    "    'title': 1\n",
    "})\n",
    "end_the_time()\n",
    "\n",
    "titles = [doc['title'] for doc in result]\n",
    "print(titles)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0e9ad5e",
   "metadata": {},
   "source": [
    "### Most 3 cited authors in 'Strategic info-mediaries'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "a8781d01",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- 0.302872896194458 seconds ---\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>referenceCount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Freshwater</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Biodiversity</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Marine</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Climate change</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ecosystem-based management</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Coastal</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eutrophication</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Phosphorus</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Policy</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Agriculture</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          _id  referenceCount\n",
       "0                  Freshwater              12\n",
       "1                Biodiversity               9\n",
       "2                      Marine               8\n",
       "3              Climate change               8\n",
       "4  Ecosystem-based management               7\n",
       "5                     Coastal               6\n",
       "6              Eutrophication               5\n",
       "7                  Phosphorus               5\n",
       "8                      Policy               5\n",
       "9                 Agriculture               4"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline = [\n",
    "    {\n",
    "        \"$match\": {\n",
    "            \"publicationDetails.journal\": \"Vertical e-markets\"\n",
    "        }\n",
    "    },\n",
    "    { \n",
    "        \"$unwind\": \"$authors\" \n",
    "    }, \n",
    "    { \n",
    "        \"$lookup\": {\n",
    "            \"from\": \"authors\",\n",
    "            \"localField\": \"authors.authorId\",\n",
    "            \"foreignField\": \"_id\",\n",
    "            \"as\": \"authors\"\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"$match\": {\n",
    "            \"authors.bio\": {\n",
    "                \"$regex\": \"[Ss]uccess\"\n",
    "            }\n",
    "        }\n",
    "    },\n",
    "    { \n",
    "        \"$unwind\": \"$keywords\" \n",
    "    },\n",
    "    { \n",
    "        \"$group\": {\n",
    "            \"_id\": \"$keywords\", \n",
    "            \"referenceCount\": { \n",
    "                \"$sum\": 1\n",
    "            } \n",
    "        } \n",
    "    },\n",
    "    {\n",
    "        \"$sort\": {\n",
    "            \"referenceCount\": -1\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"$limit\": 10\n",
    "    }\n",
    "]\n",
    "\n",
    "start_the_time()\n",
    "result = db[\"papers\"].aggregate(pipeline)\n",
    "end_the_time()\n",
    "\n",
    "pd.DataFrame(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fc8b56f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "39ae3826",
   "metadata": {},
   "source": [
    "### Title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "146fef1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline = [\n",
    "    {\n",
    "        \"$match\": {\n",
    "            \"publicationDetails.journal\": \"Next-generation users\",\n",
    "        }\n",
    "    },\n",
    "    { \"$unwind\": \"$authors\" }, \n",
    "    { \n",
    "        \"$group\": {\n",
    "            \"_id\": \"$references.authors.name\", \n",
    "            \"referenceCount\": { \n",
    "                \"$sum\": 1\n",
    "            } \n",
    "        } \n",
    "    },\n",
    "    {\n",
    "        \"$sort\": {\n",
    "            \"referenceCount\": -1\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"$limit\": 3\n",
    "    }\n",
    "]\n",
    "\n",
    "start_the_time()\n",
    "result = db[\"papers\"].aggregate(pipeline)\n",
    "end_the_time()\n",
    "\n",
    "pd.DataFrame(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "5c02ad39",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- 0.0010950565338134766 seconds ---\n"
     ]
    }
   ],
   "source": [
    "new_journal = { \n",
    "    'issn': '89012388',\n",
    "    'name': 'Advanced Topics on Databases',\n",
    "    'volumes': []\n",
    "}\n",
    "start_the_time()\n",
    "new_journal_id = db[\"journals\"].insert_one(new_journal).inserted_id\n",
    "end_the_time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "2f128b04",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- 0.09116077423095703 seconds ---\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>paper_number</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>One-to-one content</td>\n",
       "      <td>744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Vertical e-markets</td>\n",
       "      <td>515</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Bricks-and-clicks web-readiness</td>\n",
       "      <td>483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Plug-and-play web-readiness</td>\n",
       "      <td>361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Back-end partnerships</td>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Next-generation users</td>\n",
       "      <td>334</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Distributed mindshare</td>\n",
       "      <td>329</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Enterprise e-services</td>\n",
       "      <td>281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Strategic info-mediaries</td>\n",
       "      <td>276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Clicks-and-mortar channels</td>\n",
       "      <td>271</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               _id  paper_number\n",
       "0               One-to-one content           744\n",
       "1               Vertical e-markets           515\n",
       "2  Bricks-and-clicks web-readiness           483\n",
       "3      Plug-and-play web-readiness           361\n",
       "4            Back-end partnerships           354\n",
       "5            Next-generation users           334\n",
       "6            Distributed mindshare           329\n",
       "7            Enterprise e-services           281\n",
       "8         Strategic info-mediaries           276\n",
       "9       Clicks-and-mortar channels           271"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline = [{\n",
    "    \"$group\": {\n",
    "        \"_id\":\"$publicationDetails.journal\",\n",
    "        \"paper_number\":{\n",
    "            \"$sum\":1\n",
    "        }\n",
    "    }\n",
    "},{\n",
    "    \"$sort\":{\n",
    "        \"paper_number\":-1\n",
    "    }\n",
    "},{\n",
    "    \"$limit\":10\n",
    "}]\n",
    "\n",
    "start_the_time()\n",
    "result = db[\"papers\"].aggregate(pipeline)\n",
    "end_the_time()\n",
    "\n",
    "pd.DataFrame(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "8f12712b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- 1.3057661056518555 seconds ---\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'{\"_id\":{\"0\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Introduction\"},\"1\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Discussion\"},\"2\":{\"journal\":\"Vertical e-markets\",\"sectionTitle\":\"Introduction\"},\"3\":{\"journal\":\"One-to-one content\",\"sectionTitle\":\"Results\"},\"4\":{\"journal\":\"Bricks-and-clicks web-readiness\",\"sectionTitle\":\"Method details\"},\"5\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Introduction\"},\"6\":{\"journal\":\"Back-end partnerships\",\"sectionTitle\":\"Introduction\"},\"7\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Introduction\"},\"8\":{\"journal\":\"Plug-and-play web-readiness\",\"sectionTitle\":\"Discussion\"},\"9\":{\"journal\":\"Next-generation users\",\"sectionTitle\":\"Results\"}},\"sectionCount\":{\"0\":630,\"1\":512,\"2\":506,\"3\":503,\"4\":371,\"5\":353,\"6\":350,\"7\":332,\"8\":330,\"9\":322}}'"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline = [\n",
    "    { \n",
    "        \"$unwind\": \"$content\"\n",
    "    }, {\n",
    "        \"$group\": {\n",
    "            \"_id\": {\n",
    "                \"journal\": \"$publicationDetails.journal\",\n",
    "                \"sectionTitle\": \"$content.title\"\n",
    "            }, \n",
    "            \"sectionCount\": {\n",
    "                \"$sum\": 1\n",
    "            }\n",
    "        }\n",
    "    }, {\n",
    "        \"$sort\": {\n",
    "            \"sectionCount\": -1\n",
    "        }\n",
    "    }, {\n",
    "        \"$limit\":10\n",
    "    }\n",
    "]\n",
    "\n",
    "start_the_time()\n",
    "result = db[\"papers\"].aggregate(pipeline)\n",
    "end_the_time()\n",
    "\n",
    "pd.DataFrame(result).to_json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecfd45d9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}