hw2 (part2): done ex2 dashboard, done 1 and 2 for canvas
This commit is contained in:
parent
4a5a71cb28
commit
94c1ad3321
6 changed files with 9648 additions and 0 deletions
33
Assignment2_part2/convert.sh
Executable file
33
Assignment2_part2/convert.sh
Executable file
|
@ -0,0 +1,33 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
|
||||
|
||||
input="$SCRIPT_DIR/data/restaurants_extended.csv"
|
||||
output="$SCRIPT_DIR/data/restaurants_extended.jsonl"
|
||||
|
||||
|
||||
# In order:
|
||||
# - Convert CSV to JSON
|
||||
# - Convert JSON array in JSON lines notation
|
||||
# - Remove last line (which is all `null`)
|
||||
cat "$input" | jq -s --raw-input --raw-output \
|
||||
'split("\n") | .[1:-1] | map(split(",")) |
|
||||
map({
|
||||
"id": .[0],
|
||||
"name": .[1],
|
||||
"cityRaw": .[2],
|
||||
"city": .[2] | split("/") | .[0],
|
||||
"country": .[2] | split("/") | .[1],
|
||||
"continent": .[2] | split("/") | .[2],
|
||||
"location": {
|
||||
"lon": .[8] | sub("^\"\\["; "") | sub("\\s*"; "") | tonumber,
|
||||
"lat": .[9] | sub("\\]\"$"; "") | sub("\\s*"; "") | tonumber,
|
||||
},
|
||||
"averageCostForTwo": .[3],
|
||||
"aggregateRating": .[4],
|
||||
"ratingText": .[5],
|
||||
"votes": .[6],
|
||||
"date": .[7]
|
||||
})' "$input" | \
|
||||
jq -c '.[]' > "$output"
|
Can't render this file because it is too large.
|
9499
Assignment2_part2/data/restaurants_extended.jsonl
Normal file
9499
Assignment2_part2/data/restaurants_extended.jsonl
Normal file
File diff suppressed because it is too large
Load diff
5
Assignment2_part2/export.ndjson
Normal file
5
Assignment2_part2/export.ndjson
Normal file
File diff suppressed because one or more lines are too long
77
Assignment2_part2/mappings.json
Normal file
77
Assignment2_part2/mappings.json
Normal file
|
@ -0,0 +1,77 @@
|
|||
{
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "long"
|
||||
},
|
||||
"name": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"city": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"country": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"continent": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"cityRaw": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"location": {
|
||||
"type": "geo_point"
|
||||
},
|
||||
"averageCostForTwo": {
|
||||
"type": "float"
|
||||
},
|
||||
"aggregateRating": {
|
||||
"type": "float"
|
||||
},
|
||||
"ratingText": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"votes": {
|
||||
"type": "float"
|
||||
},
|
||||
"date": {
|
||||
"type": "date",
|
||||
"format": "date_optional_time"
|
||||
}
|
||||
}
|
||||
}
|
34
Assignment2_part2/upload.sh
Executable file
34
Assignment2_part2/upload.sh
Executable file
|
@ -0,0 +1,34 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
elastic_dir="$HOME/bin/elasticsearch-8.6.2"
|
||||
elastic_url="https://localhost:9200"
|
||||
crt="$elastic_dir/config/certs/http_ca.crt"
|
||||
|
||||
input="$SCRIPT_DIR/data/restaurants_extended.jsonl"
|
||||
password="GZH*wqNTvQ0WRdrPrpHm"
|
||||
|
||||
index_name="restaurants_extended"
|
||||
|
||||
# Create index
|
||||
curl --cacert "$crt" -u "elastic:$password" \
|
||||
-X DELETE "$elastic_url/$index_name" | jq . || true
|
||||
curl --cacert "$crt" -u "elastic:$password" \
|
||||
-X PUT "$elastic_url/$index_name" | jq .
|
||||
|
||||
# Upload mappings
|
||||
cat mappings.json | curl --cacert "$crt" -u "elastic:$password" -X POST \
|
||||
--data-binary @- "$elastic_url/$index_name/_mappings/" \
|
||||
-H "Content-Type: application/json" | jq .
|
||||
|
||||
# Upload documents one by one
|
||||
while IFS= read -r line
|
||||
do
|
||||
id=$(echo "$line" | jq '.id | tonumber')
|
||||
echo $line | curl -k --cacert "$crt" -u "elastic:$password" -X PUT \
|
||||
--data-binary @- "$elastic_url/$index_name/_doc/$id" \
|
||||
-H "Content-Type: application/json" | jq ._id &
|
||||
done < "$input"
|
Reference in a new issue