資料查詢和統計都試過後,接下來來產生統計圖表看看。
預計是使用node.js+elasticsearch來處理,最後會產出個圖像檔案出來。

準備資料

這次想拿單日特定關鍵字的出現次數來畫個treemap來看看,不知為何一直對這圖表很有興趣。

先列出目前想得到的關鍵字,寫在一個json檔案裡:

{
    "groups": {
        "genius": [
            "天才",
            "かしこい"
        ],
        "helpful": [
            "助かる",
            "たすかる"
        ],
        "tete": [
            "てぇてぇ"
        ],
        "nice": [
            "うまい",
            "ナイス",
            "ないす",
            "いいね",
            "上手い"
        ],
        "flag": [
            "フラグ",
            "flag"
        ],
        "comeback": [
            "おかえり"
        ],
        "great": [
            "えらい"
        ],
        "likes": [
            "すき",
            "スキ",
            "好き"
        ],
        "discord": [
            "不仲"
        ],
        "poop": [
            "うんち",
            "💩"
        ],
        "fire": [
            "炎上",
            "🔥"
        ],
        "pity": [
            "不憫"
        ],
        "cute": [
            "カワイイ",
            "可愛",
            "かわいい"
        ],
        "know": [
            "わかる"
        ],
        "sexy": [
            "センシティブ",
            "エッチ"
        ]
    }
}

同義詞的部分還沒實際試,簡單看了一下,感覺要重建index所以就有些遲疑,要不然這部分可以弄得更簡單些。

然後寫個getQuery方法來產生API需要的query

function getQuery(dd) {
    let output = {};

    let must = [{ "range": { "post_time": { "time_zone": "+08:00", "gte": `${dd}T00:00:00`, "lte": `${dd}T23:59:59` } } }];
    let should = [];
    let aggs = { "count": { "filters": { "filters": {} }, "aggs": { "channel": { "terms": { "field": "channel_id.keyword", "size": 200 } } } } };

    let total_keywords = [];

    Object.keys(KEYWORDS.groups).forEach((key) => {
        let item = {};
        let list = KEYWORDS.groups[key];
        let count_keyword = list.length;

        total_keywords = total_keywords.concat(list);

        if (count_keyword > 1) {
            let group_should = [];

            list.forEach((keyword) => {
                let obj = { "match_phrase": { "content": keyword } }
                group_should.push(obj);
            })

            item = { "bool": { "should": group_should } };
        } else {
            item = { "match_phrase": { "content": list[0] } }
        }

        aggs.count.filters.filters[key] = item;
    })

    total_keywords.forEach((keyword) => {
        let obj = { "match_phrase": { "content": keyword } };
        should.push(obj);
    })

    output = { "size": 0, "query": { "bool": { must, should, "minimum_should_match": 1 } }, aggs }

    return output;
}

接著寫個doSearch方法,輸入時間字串後,去呼叫elasticsearch search API取得結果並處理,回傳加工完的陣列:

function doSearch(dd) {
    return new Promise((resolve, reject) => {
        let index_dd = dd.slice(0, 7).replace(/-/g, "");
        let url = `http://10.0.0.19:9200/n7i-chats-${index_dd}/_search`;

        let json = getQuery(dd);

        console.log(`dd: ${dd} , index_dd: ${index_dd}`);

        rq.post(url, { json }, (err, res) => {

            let data = res.body;
            let list = data.aggregations.count.buckets;

            let output = {
                "name": "livechat",
                "children": []
            }

            Object.keys(list).forEach((key) => {
                let name = LABELS[key]; // mark
                let children = [];

                list[key].channel.buckets.forEach((channel) => {
                    let name = getChannelNameFromID(channel.key); // mark
                    let value = channel.doc_count;
                    let child = {
                        name,
                        value,
                        category: key
                    }
                    children.push(child);
                })

                let keyword = {
                    name,
                    children
                }

                if (["discord"].indexOf(key) == -1) { // disable
                    output.children.push(keyword);
                }
            })

            resolve(output);
        })
    })
}

陣列的資料大概是:

{
    "name": "livechat",
    "children: [
        {
            "name": "helpful",
            "value": 2434,
            "category": "helpful",
            "children": [
                "name": "UCoztvTULBYd3WmStqYeoHcA",
                "value": 3939
            ]
        }
        ...
        ...
        ...
    ]
}

到這邊資料算是準備好了。

繪製圖表

接著參考這裡的範例,寫個drawTreeMap方法:

function drawTreeMap(dataset) {
    return new Promise((resolve, reject) => {

        const width = 1280 * 2;
        const height = 720 * 2;

        const hierarchy = d3.hierarchy(dataset)
            .sum(d => d.value)  
            .sort((a, b) => b.value - a.value);

            treemap = d3.treemap()
                .size([width - 10, height])
                .paddingTop(35)
                .paddingLeft(10)
                .paddingRight(10)
                .paddingBottom(10)
                .paddingInner(5);

        root = treemap(hierarchy);

        const categories = dataset.children.map(d => d.name),

        let top = d3.select(main);
        let svg = top.append("svg")
            .attr("width", width)
            .attr("height", height);

        svg.selectAll("rect")
            .data(root.leaves())
            .enter()
            .append("rect")
            .attr("x", d => d.x0)
            .attr("y", d => d.y0)
            .attr("width", d => d.x1 - d.x0)
            .attr("height", d => d.y1 - d.y0)
            .attr("fill", (d) => {
                let color = "white";

                color = d.data.category == "nice" ? "#ffcc5c" : color;
                color = d.data.category == "likes" ? "#fe9c8f" : color;
                color = d.data.category == "know" ? "#03396c" : color;
                color = d.data.category == "cute" ? "#f37736" : color;
                color = d.data.category == "helpful" ? "#35a79c" : color;
                color = d.data.category == "genius" ? "#4d648d" : color;
                color = d.data.category == "great" ? "#8b9dc3" : color;
                color = d.data.category == "comeback" ? "#76b4bd" : color;
                color = d.data.category == "flag" ? "#d11141" : color;
                color = d.data.category == "tete" ? "#ff8b94" : color;
                color = d.data.category == "fire" ? "#d62d20" : color;
                color = d.data.category == "poop" ? "#3c2f2f" : color;
                color = d.data.category == "sexy" ? "#ff6f69" : color;
                color = d.data.category == "pity" ? "#7f8e9e" : color;

                return color;
            });

        svg.selectAll("text")
            .data(root.leaves())
            .enter()
            .append("text")
            .attr("x", function (d) {
                let setting = getTextSetting(context, d.data.name, "MotoyaLMaru", 20, d.x0, d.y0, d.x1 - d.x0, d.y1 - d.y0);
                return setting.x;
            })
            .attr("y", function (d) {
                let setting = getTextSetting(context, d.data.name, "MotoyaLMaru", 20, d.x0, d.y0, d.x1 - d.x0, d.y1 - d.y0);
                return setting.y;
            })

            .attr("font-size", d => {
                let width = d.x1 - d.x0
                let text_width = d.data.name.length * 20;
                let val = text_width < width ? '20px' : '0px';

                let setting = getTextSetting(context, d.data.name, "MotoyaLMaru", 20, d.x0, d.y0, d.x1 - d.x0, d.y1 - d.y0);
                return `${setting.size}px`;
            })
            .attr("text-anchor", "middle")
            .style('opacity', function (d) {
                let setting = getTextSetting(context, d.data.name, "MotoyaLMaru", 20, d.x0, d.y0, d.x1 - d.x0, d.y1 - d.y0);
                return setting.opacity;
            })
            .attr("fill", "white")
            .style("font-family", "MotoyaLMaru")
            .text(function (d) { return d.data.name; })
            ;

        svg
            .selectAll("titles")
            .data(root.descendants().filter(function (d) { return d.depth == 1 }))
            .enter()
            .append("text")
            .attr("x", function (d) { return d.x0 })
            .attr("y", function (d) { return d.y0 + 21 })
            .text(function (d) { return d.data.name })
            .attr("font-size", "20px")
            .style("font-family", "MotoyaLMaru")
            .attr("fill", "black")

        svg
            .append("text")
            .attr("x", 0)
            .attr("y", 25)   
            .text(`${search_date} livechat keyword treemap`)
            .attr("font-size", "30px")
            .attr("fill", "black")

        resolve(main.innerHTML);
    })
}

由於想在每個格子裡面寫下人名,但有些名字太長導致會超過範圍,所以額外在寫個getTextSetting方法來算出合適的座標和字型大小,如果真的寫不下的話則透過設定透明度的方式讓它不顯示:

function getTextSetting(context, text, font, font_size, parent_x, parent_y, parent_width, parent_height) {
    context.font = `${font_size}pt '${font}'`;

    let text_width = context.measureText(text).width;

    let x = parent_x + ((parent_width) / 2);
    let y = parent_y + ((parent_height + 10) / 2);
    let size = font_size;
    let opacity = text_width > parent_width ? 0 : 1;

    while (opacity == 0) {
        size -= 1;
        context.font = `${size}pt '${font}'`;
        text_width = context.measureText(text).width;
        opacity = text_width > parent_width ? 0 : 1;

        if (size <= 1) {
            break;
        }
    }

    return { x, y, size, opacity };
}

寫個doGeneratePNGFromSVG方法來處理drawTreeMap回傳的資料,產生出Buffer

function doGeneratePNGFromSVG(svg_source) {
    return new Promise((resolve, reject) => {
        const cc = Canvas(1280, 720);

        ctx = cc.getContext('2d');
        ctx.fillStyle = "#000000";
        ctx.fillRect(0, 0, 1280, 720);

        const v = Canvg.fromString(ctx, svg_source, preset);

        v.render().then((res) => {
            resolve(cc.toBuffer())
        })
    })
}

最後,將Buffer寫成檔案:

doGeneratePNGFromSVG(source).then((buffer) => {
    fs.writeFile(`${search_date}-treemap.png`, buffer, (res) => { });
});

完整的程式碼連結

結果圖:

mrcob

雖然可以調整的部分還蠻多的,像是有些字體可以被放大,還有顏色的部分。不過至少有個樣子了。