hw2 (part2): done ex2 dashboard, done 1 and 2 for canvas

This commit is contained in:
Claudio Maggioni 2023-05-03 22:47:01 +02:00
parent 4a5a71cb28
commit 94c1ad3321
6 changed files with 9648 additions and 0 deletions

33
Assignment2_part2/convert.sh Executable file
View file

@ -0,0 +1,33 @@
#!/bin/sh
set -e
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
input="$SCRIPT_DIR/data/restaurants_extended.csv"
output="$SCRIPT_DIR/data/restaurants_extended.jsonl"
# In order:
# - Convert CSV to JSON
# - Convert JSON array in JSON lines notation
# - Remove last line (which is all `null`)
cat "$input" | jq -s --raw-input --raw-output \
'split("\n") | .[1:-1] | map(split(",")) |
map({
"id": .[0],
"name": .[1],
"cityRaw": .[2],
"city": .[2] | split("/") | .[0],
"country": .[2] | split("/") | .[1],
"continent": .[2] | split("/") | .[2],
"location": {
"lon": .[8] | sub("^\"\\["; "") | sub("\\s*"; "") | tonumber,
"lat": .[9] | sub("\\]\"$"; "") | sub("\\s*"; "") | tonumber,
},
"averageCostForTwo": .[3],
"aggregateRating": .[4],
"ratingText": .[5],
"votes": .[6],
"date": .[7]
})' "$input" | \
jq -c '.[]' > "$output"

View file

Can't render this file because it is too large.

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,77 @@
{
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"continent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"cityRaw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"location": {
"type": "geo_point"
},
"averageCostForTwo": {
"type": "float"
},
"aggregateRating": {
"type": "float"
},
"ratingText": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"votes": {
"type": "float"
},
"date": {
"type": "date",
"format": "date_optional_time"
}
}
}

34
Assignment2_part2/upload.sh Executable file
View file

@ -0,0 +1,34 @@
#!/bin/bash
set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
elastic_dir="$HOME/bin/elasticsearch-8.6.2"
elastic_url="https://localhost:9200"
crt="$elastic_dir/config/certs/http_ca.crt"
input="$SCRIPT_DIR/data/restaurants_extended.jsonl"
password="GZH*wqNTvQ0WRdrPrpHm"
index_name="restaurants_extended"
# Create index
curl --cacert "$crt" -u "elastic:$password" \
-X DELETE "$elastic_url/$index_name" | jq . || true
curl --cacert "$crt" -u "elastic:$password" \
-X PUT "$elastic_url/$index_name" | jq .
# Upload mappings
cat mappings.json | curl --cacert "$crt" -u "elastic:$password" -X POST \
--data-binary @- "$elastic_url/$index_name/_mappings/" \
-H "Content-Type: application/json" | jq .
# Upload documents one by one
while IFS= read -r line
do
id=$(echo "$line" | jq '.id | tonumber')
echo $line | curl -k --cacert "$crt" -u "elastic:$password" -X PUT \
--data-binary @- "$elastic_url/$index_name/_doc/$id" \
-H "Content-Type: application/json" | jq ._id &
done < "$input"