作者:The Myth
翻译&注释:段青青,李昕
网页原址:
https://www.codeproject.com/articles/693841/making-dashboards-with-dc-js-part-using-crossfil
版权:中国石油大学(华东)可视分析小组
Crossfilter.js是一个JavaScript插件,用于对JavaScript数组进行切片和分组,是数据之间能进行灵活的交互。 因此dc.js
和vega等Web端的可视化工具都采用crossfilter
进行数据交互,它使图表更容易操纵数据,并对过滤后的数据进行刷新。 Crossfilter
网站的例子是dc.js
库灵感的来源。
因为大多数实际的Dashboard代码都在操纵图表的数据,所以一旦理解了crossfilter
的工作原理,实际的绘图就很简单了。本文档将尝试介绍几种不同的场景,这样您就可以在开始使用它的时候避免一些陷阱。
下载的crossfilter.js
文件来自GitHub,并将其包含在您的HTML页面中。对于这些示例,将使用原始的GitHub源代码作为参考。
x
1<script type="text/javascript" src="https://rawgithub.com/NickQiZhu/dc.js/master/web/js/crossfilter.js"></script>
首先需要一些数据。下面的数据是从Crossfilter API文档中提取的。
x
1var data = [
2 {date: "2011-11-14T16:17:54Z", quantity: 2, total: 190, tip: 100, type: "tab"},
3 {date: "2011-11-14T16:20:19Z", quantity: 2, total: 190, tip: 100, type: "tab"},
4 {date: "2011-11-14T16:28:54Z", quantity: 1, total: 300, tip: 200, type: "visa"},
5 {date: "2011-11-14T16:30:43Z", quantity: 2, total: 90, tip: 0, type: "tab"},
6 {date: "2011-11-14T16:48:46Z", quantity: 2, total: 90, tip: 0, type: "tab"},
7 {date: "2011-11-14T16:53:41Z", quantity: 2, total: 90, tip: 0, type: "tab"},
8 {date: "2011-11-14T16:54:06Z", quantity: 1, total: 100, tip: 0, type: "cash"},
9 {date: "2011-11-14T16:58:03Z", quantity: 2, total: 90, tip: 0, type: "tab"},
10 {date: "2011-11-14T17:07:21Z", quantity: 2, total: 90, tip: 0, type: "tab"},
11 {date: "2011-11-14T17:22:59Z", quantity: 2, total: 90, tip: 0, type: "tab"},
12 {date: "2011-11-14T17:25:45Z", quantity: 2, total: 200, tip: 0, type: "cash"},
13 {date: "2011-11-14T17:29:52Z", quantity: 1, total: 200, tip: 100, type: "visa"}
14];
然后创建一个crossfilter
实例。
x
1var ndx = crossfilter(data);
对于第一个示例,我们将使用一个整数列设置一个filter。假设我们想筛选total列为90的数据,需要在total列上设置一个dimension。
x
1var totalDim = ndx.dimension(function(d) { return d.total; });
与d3.js
类似,这里function(d) { return d.total; }
是一个无名函数,参数d表示一条记录,通过返回值的设定得到针对total
列的dimension。
如果想要找到所有的total等于90的数据,可以按以下方式操作:
xxxxxxxxxx
11var total_90 = totalDim.filter(90);
为了查看结果,可以将total_90变量输出到webconsole上。
1print_filter("total_90");
输出内容如下:
xxxxxxxxxx
81"total_90(6) = [
2 {"date":"2011-11-14T17:22:59Z","quantity":2,"total":90,"tip":0,"type":"tab"},
3 {"date":"2011-11-14T17:07:21Z","quantity":2,"total":90,"tip":0,"type":"tab"},
4 {"date":"2011-11-14T16:58:03Z","quantity":2,"total":90,"tip":0,"type":"tab"},
5 {"date":"2011-11-14T16:53:41Z","quantity":2,"total":90,"tip":0,"type":"tab"},
6 {"date":"2011-11-14T16:48:46Z","quantity":2,"total":90,"tip":0,"type":"tab"},
7 {"date":"2011-11-14T16:30:43Z","quantity":2,"total":90,"tip":0,"type":"tab"}
8]"
由于希望能够看到filter是否正常工作,所以创建了一个函数来将数据打印到webconsole中(建议采用chrome浏览器,在调试上既灵活又方便)。
x
1function print_filter(filter){
2 var f=eval(filter);
3 if (typeof(f.length) != "undefined") {}else{}
4 if (typeof(f.top) != "undefined") {f=f.top(Infinity);}else{}
5 if (typeof(f.dimension) != "undefined") {f=f.dimension(function(d) { return "";}).top(Infinity);}else{}
6 console.log(filter+"("+f.length+") = "+JSON.stringify(f).replace("[","[\n\t").replace(/}\,/g,"},\n\t").replace("]","\n]"));
7}
filter(90)与filterExact(90)表达的含义一致,以下代码的输出结果与filter(90)完全相同:
1var total_90 = totalDim.filterExact(90);
2print_filter("total_90");
如果需要过滤一个范围,例如从90到100的范围,可以将参数放在方括号里。范围为左闭右开,因此如果需要在过滤器中包含100,需要把把上限写为101。这与totalDim.filterRange([90,101])的作用相同。
xxxxxxxxxx
21var total_90_101= totalDim.filter([90,101]);
2print_filter("total_90_101");
结果为:
xxxxxxxxxx
91"total_90_101(7) = [
2 {"date":"2011-11-14T16:54:06Z","quantity":1,"total":100,"tip":0,"type":"cash"},
3 {"date":"2011-11-14T17:22:59Z","quantity":2,"total":90,"tip":0,"type":"tab"},
4 {"date":"2011-11-14T17:07:21Z","quantity":2,"total":90,"tip":0,"type":"tab"},
5 {"date":"2011-11-14T16:58:03Z","quantity":2,"total":90,"tip":0,"type":"tab"},
6 {"date":"2011-11-14T16:53:41Z","quantity":2,"total":90,"tip":0,"type":"tab"},
7 {"date":"2011-11-14T16:48:46Z","quantity":2,"total":90,"tip":0,"type":"tab"},
8 {"date":"2011-11-14T16:30:43Z","quantity":2,"total":90,"tip":0,"type":"tab"}
9]"
通过自定义过滤函数可以获取更灵活的结果。比如只抓取能被3整除的内容,可以使用如下方法。这种简单的尝试可以直接在chrome浏览器的控制台上直接输入下面两条语句来查看所需数据,方便快捷。
xxxxxxxxxx
21var total_3= totalDim.filter(function(d) { if (d%3===0) {return d;} } );
2print_filter("total_3");
结果如下:
xxxxxxxxxx
91"total_3(7) = [
2 {"date":"2011-11-14T16:28:54Z","quantity":1,"total":300,"tip":200,"type":"visa"},
3 {"date":"2011-11-14T17:22:59Z","quantity":2,"total":90,"tip":0,"type":"tab"},
4 {"date":"2011-11-14T17:07:21Z","quantity":2,"total":90,"tip":0,"type":"tab"},
5 {"date":"2011-11-14T16:58:03Z","quantity":2,"total":90,"tip":0,"type":"tab"},
6 {"date":"2011-11-14T16:53:41Z","quantity":2,"total":90,"tip":0,"type":"tab"},
7 {"date":"2011-11-14T16:48:46Z","quantity":2,"total":90,"tip":0,"type":"tab"},
8 {"date":"2011-11-14T16:30:43Z","quantity":2,"total":90,"tip":0,"type":"tab"}
9]"
对数字进行过滤非常简单。但是如果有数据是空值,crossfilter
将不知道如何处理,因此要确保数据集中没有空值。
如果想找到type列中所有使用visa的所有条目,首先需要创建一个dimension,然后进行过滤。
xxxxxxxxxx
31var typeDim = ndx.dimension(function(d) {return d.type;});
2var visa_filter = typeDim.filter("visa");
3print_filter("visa_filter");
结果如下:
xxxxxxxxxx
41"visa_filter(2) = [
2 {"date":"2011-11-14T17:29:52Z","quantity":1,"total":200,"tip":100,"type":"visa"},
3 {"date":"2011-11-14T16:28:54Z","quantity":1,"total":300,"tip":200,"type":"visa"}
4]"
如果需要获取值为cash的条目也是同样简单的。
xxxxxxxxxx
21var cash_filter = typeDim.filter("cash");
2print_filter("cash_filter");
结果如下:
xxxxxxxxxx
41"cash_filter(2) = [
2 {"date":"2011-11-14T17:25:45Z","quantity":2,"total":200,"tip":0,"type":"cash"},
3 {"date":"2011-11-14T16:54:06Z","quantity":1,"total":100,"tip":0,"type":"cash"}
4]"
下面考虑用reduceSum
对total列中的cash条目进行求和。这里有一些小问题。 前面我们将type列的dimension设置了过滤器。因此会认为reduceSum
只针对过滤后的数据。但事实并非如此。 如果采用group
函数对过滤后的数据做reduceSum
,它不会考虑当前过滤器,而是返回对应关键字的每种类型的总和。这对于dc.js
来说是有意义的,但不适用于仅想访问cash类型数据的情况。
xxxxxxxxxx
21var total = typeDim.group().reduceSum(function(d) {return d.total;});
2print_filter("total");
如果group函数后面不连接其他的函数,它将对当前dimension下的每个类别进行计数。
以下结果可以看出每种类型的总和都被计算出来:
xxxxxxxxxx
51"total(3) = [
2 {"key":"tab","value":920},
3 {"key":"visa","value":500},
4 {"key":"cash","value":300}
5]"
为了获取cash条目的总和,需要采用crossfilter
中的groupAll
函数,这个函数会考虑当前所有的过滤器。在此基础上做reduceSum
可以得到在当前过滤条件下total
列数据的总和。代码如下:
xxxxxxxxxx
21var cash_total = ndx.groupAll().reduceSum(function(d) {return d.total;}).value()
2console.log("cash_total="+cash_total);
因为考虑到了现有的过滤器,因此得到的结果仅为cash条目的总和:
xxxxxxxxxx
11"cash_total=300"
请注意对比group
和groupAll
两个函数在使用上的区别。
既然crossfilter
对象观察到了所有的过滤器,那么当决定用cash进行过滤时,为什么它没有观察到visa过滤器呢?而且当想采用以下代码同时获取cash和visa数据时:
xxxxxxxxxx
21var cash_and_visa_filter = typeDim.filter(function(d) { if (d ==="visa" || d==="cash") {return d;} });
2print_filter("cash_and_visa_filter");
cash过滤器仍然起作用,因此结果如下:
xxxxxxxxxx
41"cash_and_visa_filter(2) = [
2 {"date":"2011-11-14T17:25:45Z","quantity":2,"total":200,"tip":0,"type":"cash"},
3 {"date":"2011-11-14T16:54:06Z","quantity":1,"total":100,"tip":0,"type":"cash"}
4]"
事实上,当我们想执行这样的操作时,首先要清除已有的过滤器。
每次建立新的过滤器前先清除已有的过滤器是一个良好的使用习惯。
1typeDim.filterAll(); //清除已有的过滤器
2var cash_and_visa_filter = typeDim.filter(function(d) { if (d ==="visa" || d==="cash") {return d;} });
3print_filter("cash_and_visa_filter");
这样就可以得到我们需要的结果:
xxxxxxxxxx
61"cash_and_visa_filter(4) = [
2 {"date":"2011-11-14T17:29:52Z","quantity":1,"total":200,"tip":100,"type":"visa"},
3 {"date":"2011-11-14T16:28:54Z","quantity":1,"total":300,"tip":200,"type":"visa"},
4 {"date":"2011-11-14T17:25:45Z","quantity":2,"total":200,"tip":0,"type":"cash"},
5 {"date":"2011-11-14T16:54:06Z","quantity":1,"total":100,"tip":0,"type":"cash"}
6]"
这样我们就基本了解了crossfilter
的作用。它最主要的作用是对相应的数据列上建立dimension,然后针对这个dimension做group和filter的操作,从而形成了数据的交互。
联系人:李昕 邮箱:lix@upc.edu.cn