In [31]:
import pandas as pd
import time
from pymongo import MongoClient

In [75]:
mongo_conn_str = "mongodb://localhost:27017"
mongo = MongoClient(mongo_conn_str)
db = mongo["ddm"]

In [33]:
def start_the_time():
    global start_time
    start_time = time.time()
    
def end_the_time():
    print("--- %s seconds ---" % (time.time() - start_time))

### Top 10 journals for numbers of papers

In [80]:
start_the_time()
result = db["papers"].find({ 
    "authors.email": {"$regex": "@usi\.ch"}
}, {
    'title': 1
})
end_the_time()

titles = [doc['title'] for doc in result]
print(titles)

--- 0.0001361370086669922 seconds ---
['Morphometric MRI as a diagnostic biomarker of frontotemporal dementia: A systematic review to determine clinical applicability', 'Low adherence of Swiss children to national dietary guidelines', 'Decomposing broadcast algorithms using abstract MAC layers']


### Most 3 cited authors in 'Strategic info-mediaries'

In [101]:
pipeline = [
    {
        "$match": {
            "publicationDetails.journal": "Vertical e-markets"
        }
    },
    { 
        "$unwind": "$authors" 
    }, 
    { 
        "$lookup": {
            "from": "authors",
            "localField": "authors.authorId",
            "foreignField": "_id",
            "as": "authors"
        }
    },
    {
        "$match": {
            "authors.bio": {
                "$regex": "[Ss]uccess"
            }
        }
    },
    { 
        "$unwind": "$keywords" 
    },
    { 
        "$group": {
            "_id": "$keywords", 
            "referenceCount": { 
                "$sum": 1
            } 
        } 
    },
    {
        "$sort": {
            "referenceCount": -1
        }
    },
    {
        "$limit": 10
    }
]

start_the_time()
result = db["papers"].aggregate(pipeline)
end_the_time()

pd.DataFrame(result)

--- 0.302872896194458 seconds ---


Unnamed: 0,_id,referenceCount
0,Freshwater,12
1,Biodiversity,9
2,Marine,8
3,Climate change,8
4,Ecosystem-based management,7
5,Coastal,6
6,Eutrophication,5
7,Phosphorus,5
8,Policy,5
9,Agriculture,4


### Title

In [None]:
pipeline = [
    {
        "$match": {
            "publicationDetails.journal": "Next-generation users",
        }
    },
    { "$unwind": "$authors" }, 
    { 
        "$group": {
            "_id": "$references.authors.name", 
            "referenceCount": { 
                "$sum": 1
            } 
        } 
    },
    {
        "$sort": {
            "referenceCount": -1
        }
    },
    {
        "$limit": 3
    }
]

start_the_time()
result = db["papers"].aggregate(pipeline)
end_the_time()

pd.DataFrame(result)

In [48]:
new_journal = { 
    'issn': '89012388',
    'name': 'Advanced Topics on Databases',
    'volumes': []
}
start_the_time()
new_journal_id = db["journals"].insert_one(new_journal).inserted_id
end_the_time()

--- 0.0010950565338134766 seconds ---


In [95]:
pipeline = [{
    "$group": {
        "_id":"$publicationDetails.journal",
        "paper_number":{
            "$sum":1
        }
    }
},{
    "$sort":{
        "paper_number":-1
    }
},{
    "$limit":10
}]

start_the_time()
result = db["papers"].aggregate(pipeline)
end_the_time()

pd.DataFrame(result)

--- 0.09116077423095703 seconds ---


Unnamed: 0,_id,paper_number
0,One-to-one content,744
1,Vertical e-markets,515
2,Bricks-and-clicks web-readiness,483
3,Plug-and-play web-readiness,361
4,Back-end partnerships,354
5,Next-generation users,334
6,Distributed mindshare,329
7,Enterprise e-services,281
8,Strategic info-mediaries,276
9,Clicks-and-mortar channels,271


In [132]:
pipeline = [
    { 
        "$unwind": "$content"
    }, {
        "$group": {
            "_id": {
                "journal": "$publicationDetails.journal",
                "sectionTitle": "$content.title"
            }, 
            "sectionCount": {
                "$sum": 1
            }
        }
    }, {
        "$sort": {
            "sectionCount": -1
        }
    }, {
        "$limit":10
    }
]

start_the_time()
result = db["papers"].aggregate(pipeline)
end_the_time()

pd.DataFrame(result).to_json()

--- 1.3057661056518555 seconds ---


'{"_id":{"0":{"journal":"One-to-one content","sectionTitle":"Introduction"},"1":{"journal":"One-to-one content","sectionTitle":"Discussion"},"2":{"journal":"Vertical e-markets","sectionTitle":"Introduction"},"3":{"journal":"One-to-one content","sectionTitle":"Results"},"4":{"journal":"Bricks-and-clicks web-readiness","sectionTitle":"Method details"},"5":{"journal":"Plug-and-play web-readiness","sectionTitle":"Introduction"},"6":{"journal":"Back-end partnerships","sectionTitle":"Introduction"},"7":{"journal":"Next-generation users","sectionTitle":"Introduction"},"8":{"journal":"Plug-and-play web-readiness","sectionTitle":"Discussion"},"9":{"journal":"Next-generation users","sectionTitle":"Results"}},"sectionCount":{"0":630,"1":512,"2":506,"3":503,"4":371,"5":353,"6":350,"7":332,"8":330,"9":322}}'