31 lines
793 B
Markdown
31 lines
793 B
Markdown
# How to generate dblp csvs
|
|
|
|
```shell
|
|
curl -o dblp.xml.gz https://dblp.org/xml/dblp.xml.gz
|
|
gunzip dblp.xml.gz
|
|
|
|
# download the DTD specification of the DBLP XML format
|
|
curl -o dblp.dtd https://dblp.org/xml/dblp.dtd
|
|
|
|
git clone https://github.com/ThomHurks/dblp-to-csv
|
|
|
|
dblp-to-csv/XMLToCSV.py --annotate dblp.xml dblp.dtd dblp_csv.csv \
|
|
--relations journal:article_journal author:article_author
|
|
|
|
for t in article; do
|
|
tr ';' '\n' <dblp_csv_${t}_header.csv | sed 's/:.*//g' | \
|
|
tr '\n' ';' | awk 1 | cat - dblp_csv_${t}.csv | \
|
|
sed -E 's/\{?\\""\}?/""/g' > csv-import/${t}.csv;
|
|
done
|
|
|
|
cp dblp_csv_{author|journal}_* dblp_csv_{author|journal}.csv csv-import
|
|
```
|
|
|
|
# Archive
|
|
|
|
The csv-import files are compressed. To decompress them run:
|
|
|
|
```shell
|
|
tar -xzvf csv-import.tar.gz
|
|
```
|