/*------------------------------------------------------------ Code for generating a Word Cloud Visualization Version 1.2 (30 March 2015) Author: Michael Goldwasser This implementation is based heavily on one originally provided by Ira Greenberg, Dianna Xu, and Deepak Kumar in Processing: Creative Coding and Generative Art in Processing 2 However, my implementation is done in a purely procedural style. ------------------------------------------------------------*/ // configuration globals int canvasSize = 800; String inputFileName = "peterpan.txt"; int N = 150; // maximum number of words to display float SCALE = 1.50; // used to alter overall render scale // For convenience, we define our own composite data type class Entry { String word; int freq; // number of occurrences float size; // font size float x; // x-coordinate of center float y; // y-coordinate of baseline float w; // width of rendered word float ascent; // height of rendered word above baseline float descent; // height of rendered word below baseline color c; // color for rendered word }; // list of unique words in the input text Entry[] lexicon = new Entry[100000]; int numEntries = 0; void setup() { // pick better looking font size(canvasSize, canvasSize); background(255); textFont(createFont("Times New Roman", 120)); textAlign(CENTER, BASELINE); loadWords(); orderWords(); renderWords(); } // load the words from the input file, removing stopwords // and computing frequencies void loadWords() { // let's begin by loading stop words to ignore String[] stopWords = loadStrings("stopwords.txt"); String delimiters = " ,./?<>;:\"[{]}\\|=+-_()*&^%$#@!~"; String[] lines = loadStrings(inputFileName); String rawText = join(lines, " ").toLowerCase(); String[] tokens = splitTokens(rawText, delimiters); for (int j=0; j < tokens.length; j++) { if (!contains(stopWords, tokens[j])) { int k = find(tokens[j]); if (k == -1) { lexicon[numEntries] = new Entry(); lexicon[numEntries].word = tokens[j]; lexicon[numEntries].freq = 1; numEntries++; } else { lexicon[k].freq++; } } } } // return the index at which given word can be found // within entry list, or -1 if its not found. int find(String word) { for (int j=0; j < numEntries; j++) { if (word.equals(lexicon[j].word)) { return j; } } return -1; } // determine whether given word is in list of strings. boolean contains(String[] list, String word) { for (int j=0; j < list.length; j++) { if (word.equals(list[j])) { return true; } } return false; } // select the N most frequently used words and place them // in the first N spots of the words array (in descending // order of frequency) void orderWords() { int limit = min(N, numEntries); for (int j=0; j < limit; j++) { // determine most frequent word in lexicon[j...limit-1] // and swap it to lexicon[j] int big = j; for (int k=j+1; k < numEntries; k++) { if (lexicon[k].freq > lexicon[big].freq) { big = k; } } if (big != j) { Entry temp = lexicon[j]; lexicon[j] = lexicon[big]; lexicon[big] = temp; } } } // render the most frequent N words void renderWords() { background(255); int limit = min(N, numEntries); float smallFreq = lexicon[limit-1].freq; float MIN_SIZE = SCALE * canvasSize / limit; // minimum font size to use for (int j=0; j < limit; j++) { setWordProperties(j, MIN_SIZE * lexicon[j].freq / smallFreq); placeWordSpiral(j); renderWord(j); } } void setWordProperties(int j, float size) { textSize(size); lexicon[j].size = size; lexicon[j].w = textWidth(lexicon[j].word); lexicon[j].ascent = textAscent(); lexicon[j].descent = textDescent(); lexicon[j].c = color(random(127),random(127),random(127)); } void placeWordRandom(int j) { lexicon[j].x = random(0, width); lexicon[j].y = random(0, height); } void placeWordRandomNoIntersect(int j) { do { placeWordRandom(j); } while (!clear(j)); } void placeWordSpiral(int j) { float cx = width/2, cy = height/2; float R = 0.0, dR = 0.2, theta = 0.0, dTheta = 0.5; do { // find the next x, y for tile, i in spiral lexicon[j].x = cx + R*cos(theta); lexicon[j].y = cy + R*sin(theta); theta+=dTheta; R += dR; } // until the tile is clear of all other tiles while (!clear (j)); } // return true if word j placement is clear of word 0..j-1 boolean clear(int j) { for (int k=0; k < j; k++) { if (intersects(j, k)) { return false; } } return true; } // determine if rendering for lexicon[j] and lexicon[k] intersect // (that is, if bounding boxes intersect) boolean intersects(int j, int k) { // the first word's bounding box float left1 = lexicon[j].x - 0.5 * lexicon[j].w; float right1 = lexicon[j].x + 0.5* lexicon[j].w; float top1 = lexicon[j].y - lexicon[j].ascent; float bot1 = lexicon[j].y + lexicon[j].descent; // the second word's bounding box float left2 = lexicon[k].x - 0.5 * lexicon[k].w; float right2 = lexicon[k].x + 0.5 * lexicon[k].w; float top2 = lexicon[k].y - lexicon[k].ascent; float bot2 = lexicon[k].y + lexicon[k].descent; // boxes intersect unless we find separating boundary return !(right1 < left2 || left1 > right2 || bot1 < top2 || top1 > bot2); } // draw the word to the screen with given properties void renderWord(int j) { textSize(lexicon[j].size); fill(lexicon[j].c); text(lexicon[j].word, lexicon[j].x, lexicon[j].y); // debugging to display bounding box //noFill(); //rect(lexicon[j].x-0.5*lexicon[j].w, lexicon[j].y - lexicon[j].ascent, // lexicon[j].w, lexicon[j].ascent + lexicon[j].descent); } // add (very) basic user controls void draw() { } void mouseClicked() { renderWords(); // re-render with new colors } void keyPressed() { if (key == 's') { save("cloud.jpg"); } }