This commit is contained in:
Claudio Maggioni 2022-11-20 14:32:18 +01:00
parent 4cc72067e4
commit da395105d1

View file

@ -9,7 +9,6 @@ fake = Faker()
# pip install faker pymongo
# TODO: figures (fake)
# TODO: inject (fake) figures and references in content
# - article doi or JSON filename can be used as paper id
@ -83,16 +82,23 @@ def save_sentence(body: dict, parents: [dict], sentence: str):
target = target["sections"][p["id"]]
target["content"] += sentence + " "
def transform_section(sec: dict) -> dict:
def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict:
content = []
if random.randint(0, 10) == 0 and len(figures) > 0:
content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }]
if "content" in sec and sec["content"] != "":
content += [sec["content"]]
if random.randint(0, 10) == 0 and len(references) > 0:
content += [{ "reference": random.randint(1, len(references)) }]
arr = []
ks = []
for k in sec["sections"].keys():
ks.append(k)
ks.sort()
for k in ks:
arr.append(transform_section(sec["sections"][k]))
content = ([sec["content"]] if "content" in sec and sec["content"] != "" else []) + arr,
arr.append(transform_section(sec["sections"][k], figures, references))
content += arr,
if "title" not in sec:
return content
@ -156,7 +162,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
figures = []
for i in range(0, random.randint(3, 15)):
figures.push({
figures.append({
"page": random.randint(1, 10),
"label": "fig" + str(i),
"caption": fake.paragraph(nb_sentences=1),
@ -200,7 +206,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
parents.append({ "id": e["secId"], "title": e["title"] })
save_sentence(body, parents, e["sentence"])
paper["content"] = transform_section(body)
paper["content"] = transform_section(body, figures, references)
return paper