diff --git a/hw02/.gitignore b/hw02/.gitignore
new file mode 100644
index 0000000..46878bb
--- /dev/null
+++ b/hw02/.gitignore
@@ -0,0 +1,2 @@
+/articles-dataset/*
+!/articles-dataset/.gitkeep
\ No newline at end of file
diff --git a/hw02/README.md b/hw02/README.md
new file mode 100644
index 0000000..11c5db8
--- /dev/null
+++ b/hw02/README.md
@@ -0,0 +1,7 @@
+# DDM HW02 Data Importer
+
+Instructions:
+- Run `pip3 install faker pymongo`
+- Install MongoDB (the connect string is the variable `mongo_conn_str`, set to a local DB with no password by default)
+- Download the ZIP file at https://elsevier.digitalcommonsdata.com/datasets/zm33cdndxs/ and extract all the JSON files in the `articles-dataset` folder
+- Run the script with `python3 import.py articles-dataset`
\ No newline at end of file
diff --git a/hw02/articles-dataset/.gitkeep b/hw02/articles-dataset/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/hw02/convert.py b/hw02/import.py
similarity index 92%
rename from hw02/convert.py
rename to hw02/import.py
index 9acb896..3122b06 100644
--- a/hw02/convert.py
+++ b/hw02/import.py
@@ -9,8 +9,6 @@ fake = Faker()
 
 # pip install faker pymongo
 
-# TODO: inject (fake) figures and references in content
-
 # - article doi or JSON filename can be used as paper id
 # - no author id, authors are equal based on name for lack of info
 # - use issn as journal id
@@ -83,14 +81,6 @@ def save_sentence(body: dict, parents: [dict], sentence: str):
     target["content"] += sentence + " "
 
 def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict:
-    content = []
-    if random.randint(0, 10) == 0 and len(figures) > 0:
-        content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }]     
-    if "content" in sec and sec["content"] != "":
-        content += [sec["content"]] 
-    if random.randint(0, 10) == 0 and len(references) > 0:
-        content += [{ "reference": random.randint(1, len(references)) }] 
-    
     arr = []
     ks = []
     for k in sec["sections"].keys():
@@ -98,10 +88,24 @@ def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict:
     ks.sort()
     for k in ks:
         arr.append(transform_section(sec["sections"][k], figures, references))
-    content += arr,
-
     if "title" not in sec:
-        return content
+        return arr
+
+    content = []
+    if random.randint(0, 4) == 0 and len(figures) > 0:
+        content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }]     
+    if "content" in sec and sec["content"] != "":
+        content += [sec["content"]] 
+    if random.randint(0, 4) == 0 and len(references) > 0:
+        content += [{ "reference": random.randint(1, len(references)) }] 
+
+    content += arr
+
+
+    if len(content) > 0 and isinstance(content[-1], list) and len(content[-1]) == 0:
+        del content[-1]
+
+
 
     return {
         "title": sec["title"],
@@ -213,6 +217,10 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
 mongo_conn_str = "mongodb://localhost:27017"
 def main():
     source_folder: str = sys.argv[1]
+    if len(sys.argv) > 2:
+        limit: int = int(sys.argv[2])
+    else:
+        limit: int = -1
 
     mongo = MongoClient(mongo_conn_str)
     db = mongo["ddm"]
@@ -226,6 +234,7 @@ def main():
     journal_ids: dict[str, ID] = {}
 
     i = 0
+    j = 0
     for filename in os.listdir(source_folder):
         if filename.endswith(".json"): 
             jsonObj = {}
@@ -233,6 +242,10 @@ def main():
                 jsonStr = "".join(jsonFile.readlines())
                 d = json.JSONDecoder()
                 jsonObj = d.decode(jsonStr)
+
+            if getProp(jsonObj, "metadata.issn") is None or getProp(jsonObj, "metadata.doi") is None:
+                j += 1
+                continue # SKIP papers with no journal ISSN or paper DOI
                 
             paper = json_to_paper(filename, jsonObj)
                 
@@ -242,9 +255,13 @@ def main():
             i += 1
             if i % 100 == 0:
                 print("Papers processed: ", i)
-            if i == 1000: # TODO: remove
+            if j % 100 == 0 and j > 0:
+                print("Papers skipped: ", j)
+            if limit > 0 and i == limit:
                 break
     
+    print("Papers skipped: ", j)
+
     i = 0
     for name, author in authors.items():
         x = db["authors"].insert_one(author)
@@ -315,9 +332,7 @@ def main():
         db["journals"].update_one(mongo_filter, mongo_update)
         i += 1
         if i % 100 == 0:
-            print("Journals updated with refs: ", i)
-
-        
+            print("Journals updated with refs: ", i)        
 
 if __name__ == "__main__":
     main()
\ No newline at end of file