diff --git a/hw02/convert.py b/hw02/convert.py index 895256e..9acb896 100644 --- a/hw02/convert.py +++ b/hw02/convert.py @@ -9,7 +9,6 @@ fake = Faker() # pip install faker pymongo -# TODO: figures (fake) # TODO: inject (fake) figures and references in content # - article doi or JSON filename can be used as paper id @@ -83,16 +82,23 @@ def save_sentence(body: dict, parents: [dict], sentence: str): target = target["sections"][p["id"]] target["content"] += sentence + " " -def transform_section(sec: dict) -> dict: +def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict: + content = [] + if random.randint(0, 10) == 0 and len(figures) > 0: + content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }] + if "content" in sec and sec["content"] != "": + content += [sec["content"]] + if random.randint(0, 10) == 0 and len(references) > 0: + content += [{ "reference": random.randint(1, len(references)) }] + arr = [] ks = [] for k in sec["sections"].keys(): ks.append(k) ks.sort() for k in ks: - arr.append(transform_section(sec["sections"][k])) - - content = ([sec["content"]] if "content" in sec and sec["content"] != "" else []) + arr, + arr.append(transform_section(sec["sections"][k], figures, references)) + content += arr, if "title" not in sec: return content @@ -156,7 +162,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict: figures = [] for i in range(0, random.randint(3, 15)): - figures.push({ + figures.append({ "page": random.randint(1, 10), "label": "fig" + str(i), "caption": fake.paragraph(nb_sentences=1), @@ -200,7 +206,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict: parents.append({ "id": e["secId"], "title": e["title"] }) save_sentence(body, parents, e["sentence"]) - paper["content"] = transform_section(body) + paper["content"] = transform_section(body, figures, references) return paper