From ae7884c3548310b0e27119e56d412a9615c9e935 Mon Sep 17 00:00:00 2001 From: Ambrose Chua Date: Fri, 23 Mar 2018 23:45:09 +0800 Subject: [PATCH] Output boxplot-able data --- .gitignore | 2 +- README.md | 2 +- boxplot.go | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ country.go | 8 ++++--- poster.svg | 68 +++++++++++++++++++++++++++++++++++----------------- 5 files changed, 123 insertions(+), 27 deletions(-) create mode 100644 boxplot.go diff --git a/.gitignore b/.gitignore index 16d56e1..c353bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ scan*.bin scan*.json latency.json country.json -country.csv +boxplot.csv diff --git a/README.md b/README.md index 6d95f18..791fbbf 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ go run country.go -in latency.json -db iptocountry -out country.json I will start with a simple plot of latency to every country from Singapore. ``` -go run json2csv.go -in country.json -out country.csv +go run boxplot.go -in country.json -out boxplot.csv ``` ## diff --git a/boxplot.go b/boxplot.go new file mode 100644 index 0000000..d745545 --- /dev/null +++ b/boxplot.go @@ -0,0 +1,70 @@ +package main + +import ( + "encoding/csv" + "encoding/json" + "flag" + "gonum.org/v1/gonum/stat" + "io/ioutil" + "log" + "os" + "sort" + "strconv" +) + +var flagIn = flag.String("in", "", "input file") +var flagOut = flag.String("out", "boxplot.csv", "output file") + +func main() { + flag.Parse() + in := *flagIn + out := *flagOut + + data := make(map[string][]float64, 0) + + log.Println("Reading file " + in) + raw, err := ioutil.ReadFile(in) + if err != nil { + panic(err) + } + + log.Println("Parsing file " + in) + err = json.Unmarshal(raw, &data) + if err != nil { + panic(err) + } + + log.Println("File " + in + " read successfully") + + log.Println("Computing params") + odata := make([][]string, 0) + for cc, ttls := range data { + sort.Float64s(ttls) + min := stat.Quantile(0.00, stat.Empirical, ttls, nil) + q1 := stat.Quantile(0.25, stat.Empirical, ttls, nil) + median := stat.Quantile(0.50, stat.Empirical, ttls, nil) + q3 := stat.Quantile(0.75, stat.Empirical, ttls, nil) + max := stat.Quantile(1.00, stat.Empirical, ttls, nil) + odata = append(odata, []string{ + cc, + strconv.Itoa(len(ttls)), + strconv.FormatFloat(min, 'f', -1, 64), + strconv.FormatFloat(q1, 'f', -1, 64), + strconv.FormatFloat(median, 'f', -1, 64), + strconv.FormatFloat(q3, 'f', -1, 64), + strconv.FormatFloat(max, 'f', -1, 64), + }) + } + + log.Println("Creating file " + out) + outFile, err := os.Create(out) + if err != nil { + panic(err) + } + log.Println("Encoding csv") + w := csv.NewWriter(outFile) + err = w.WriteAll(odata) + if err != nil { + panic(err) + } +} diff --git a/country.go b/country.go index 3ccb350..7ff1a66 100644 --- a/country.go +++ b/country.go @@ -105,6 +105,10 @@ func main() { odata := make(map[string][]int, 0) j := 0 for ip, ttl := range data { + if rand.Intn(100) == 0 { + continue + } + for geo[j].End <= ip { j += 1 } @@ -115,9 +119,7 @@ func main() { } cc := geo[j].CC - if rand.Intn(100) == 0 { - odata[cc] = append(odata[cc], ttl) - } + odata[cc] = append(odata[cc], ttl) } total := 0 diff --git a/poster.svg b/poster.svg index 65bc610..20bb830 100644 --- a/poster.svg +++ b/poster.svg @@ -247,17 +247,17 @@ borderopacity="1.0" inkscape:pageopacity="0" inkscape:pageshadow="2" - inkscape:zoom="0.64" - inkscape:cx="1853.3374" - inkscape:cy="3388.8728" + inkscape:zoom="0.22627417" + inkscape:cx="1775.0712" + inkscape:cy="1687.1479" inkscape:document-units="mm" inkscape:current-layer="layer1" showgrid="false" - inkscape:window-width="1432" - inkscape:window-height="855" - inkscape:window-x="4" + inkscape:window-width="1440" + inkscape:window-height="856" + inkscape:window-x="0" inkscape:window-y="0" - inkscape:window-maximized="0" + inkscape:window-maximized="1" showguides="true" inkscape:guide-bbox="true"> A good overview of the data is the following chart of the top 5 countries with the lowest latency:For the purposes of this poster, we will compare the latencies from Singapore to South Korea and the latencies from Singapore to China.From that data, we can perform a 2-sample t-test on the latency data to USA and the UK. From that data, we can perform a 2-sample t-test on the latency data to South Korea and China.