working
This commit is contained in:
parent
4cc72067e4
commit
da395105d1
1 changed files with 13 additions and 7 deletions
|
@ -9,7 +9,6 @@ fake = Faker()
|
||||||
|
|
||||||
# pip install faker pymongo
|
# pip install faker pymongo
|
||||||
|
|
||||||
# TODO: figures (fake)
|
|
||||||
# TODO: inject (fake) figures and references in content
|
# TODO: inject (fake) figures and references in content
|
||||||
|
|
||||||
# - article doi or JSON filename can be used as paper id
|
# - article doi or JSON filename can be used as paper id
|
||||||
|
@ -83,16 +82,23 @@ def save_sentence(body: dict, parents: [dict], sentence: str):
|
||||||
target = target["sections"][p["id"]]
|
target = target["sections"][p["id"]]
|
||||||
target["content"] += sentence + " "
|
target["content"] += sentence + " "
|
||||||
|
|
||||||
def transform_section(sec: dict) -> dict:
|
def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict:
|
||||||
|
content = []
|
||||||
|
if random.randint(0, 10) == 0 and len(figures) > 0:
|
||||||
|
content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }]
|
||||||
|
if "content" in sec and sec["content"] != "":
|
||||||
|
content += [sec["content"]]
|
||||||
|
if random.randint(0, 10) == 0 and len(references) > 0:
|
||||||
|
content += [{ "reference": random.randint(1, len(references)) }]
|
||||||
|
|
||||||
arr = []
|
arr = []
|
||||||
ks = []
|
ks = []
|
||||||
for k in sec["sections"].keys():
|
for k in sec["sections"].keys():
|
||||||
ks.append(k)
|
ks.append(k)
|
||||||
ks.sort()
|
ks.sort()
|
||||||
for k in ks:
|
for k in ks:
|
||||||
arr.append(transform_section(sec["sections"][k]))
|
arr.append(transform_section(sec["sections"][k], figures, references))
|
||||||
|
content += arr,
|
||||||
content = ([sec["content"]] if "content" in sec and sec["content"] != "" else []) + arr,
|
|
||||||
|
|
||||||
if "title" not in sec:
|
if "title" not in sec:
|
||||||
return content
|
return content
|
||||||
|
@ -156,7 +162,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
|
||||||
|
|
||||||
figures = []
|
figures = []
|
||||||
for i in range(0, random.randint(3, 15)):
|
for i in range(0, random.randint(3, 15)):
|
||||||
figures.push({
|
figures.append({
|
||||||
"page": random.randint(1, 10),
|
"page": random.randint(1, 10),
|
||||||
"label": "fig" + str(i),
|
"label": "fig" + str(i),
|
||||||
"caption": fake.paragraph(nb_sentences=1),
|
"caption": fake.paragraph(nb_sentences=1),
|
||||||
|
@ -200,7 +206,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
|
||||||
parents.append({ "id": e["secId"], "title": e["title"] })
|
parents.append({ "id": e["secId"], "title": e["title"] })
|
||||||
save_sentence(body, parents, e["sentence"])
|
save_sentence(body, parents, e["sentence"])
|
||||||
|
|
||||||
paper["content"] = transform_section(body)
|
paper["content"] = transform_section(body, figures, references)
|
||||||
return paper
|
return paper
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in a new issue