Merge pull request #194 from Doha2012/master

add reddit classifier
This commit is contained in:
Eugen 2015-04-16 23:50:20 +03:00
commit fbb574ae02
10 changed files with 8442 additions and 3 deletions

View File

@ -156,6 +156,13 @@
<scope>runtime</scope> <scope>runtime</scope>
</dependency> </dependency>
<!-- apache mahout -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.9</version>
</dependency>
<!-- marshalling --> <!-- marshalling -->

View File

@ -1,14 +1,24 @@
package org.baeldung.config; package org.baeldung.config;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import org.baeldung.reddit.classifier.RedditClassifier;
import org.baeldung.reddit.util.UserAgentInterceptor;
import org.baeldung.web.schedule.ScheduledTasks; import org.baeldung.web.schedule.ScheduledTasks;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource; import org.springframework.context.annotation.PropertySource;
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer; import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;
import org.springframework.core.env.Environment;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling; import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.security.oauth2.client.OAuth2ClientContext; import org.springframework.security.oauth2.client.OAuth2ClientContext;
@ -36,6 +46,9 @@ import org.springframework.web.servlet.view.InternalResourceViewResolver;
@ComponentScan({ "org.baeldung.web" }) @ComponentScan({ "org.baeldung.web" })
public class WebConfig extends WebMvcConfigurerAdapter { public class WebConfig extends WebMvcConfigurerAdapter {
@Autowired
private Environment env;
@Bean @Bean
public static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderConfigurer() { public static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderConfigurer() {
return new PropertySourcesPlaceholderConfigurer(); return new PropertySourcesPlaceholderConfigurer();
@ -63,10 +76,22 @@ public class WebConfig extends WebMvcConfigurerAdapter {
@Bean @Bean
public ScheduledTasks scheduledTasks(OAuth2ProtectedResourceDetails reddit) { public ScheduledTasks scheduledTasks(OAuth2ProtectedResourceDetails reddit) {
final ScheduledTasks s = new ScheduledTasks(); final ScheduledTasks s = new ScheduledTasks();
s.setRedditRestTemplate(new OAuth2RestTemplate(reddit)); final List<ClientHttpRequestInterceptor> list = new ArrayList<ClientHttpRequestInterceptor>();
list.add(new UserAgentInterceptor());
final OAuth2RestTemplate restTemplate = new OAuth2RestTemplate(reddit);
restTemplate.setInterceptors(list);
s.setRedditRestTemplate(restTemplate);
return s; return s;
} }
@Bean
public RedditClassifier redditClassifier() throws IOException {
final Resource file = new ClassPathResource("train.csv");
final RedditClassifier redditClassifier = new RedditClassifier();
redditClassifier.trainClassifier(file.getFile().getAbsolutePath());
return redditClassifier;
}
@Override @Override
public void addResourceHandlers(ResourceHandlerRegistry registry) { public void addResourceHandlers(ResourceHandlerRegistry registry) {
registry.addResourceHandler("/resources/**").addResourceLocations("/resources/"); registry.addResourceHandler("/resources/**").addResourceLocations("/resources/");
@ -108,6 +133,9 @@ public class WebConfig extends WebMvcConfigurerAdapter {
@Bean @Bean
public OAuth2RestTemplate redditRestTemplate(OAuth2ClientContext clientContext) { public OAuth2RestTemplate redditRestTemplate(OAuth2ClientContext clientContext) {
final OAuth2RestTemplate template = new OAuth2RestTemplate(reddit(), clientContext); final OAuth2RestTemplate template = new OAuth2RestTemplate(reddit(), clientContext);
final List<ClientHttpRequestInterceptor> list = new ArrayList<ClientHttpRequestInterceptor>();
list.add(new UserAgentInterceptor());
template.setInterceptors(list);
final AccessTokenProvider accessTokenProvider = new AccessTokenProviderChain(Arrays.<AccessTokenProvider> asList(new MyAuthorizationCodeAccessTokenProvider(), new ImplicitAccessTokenProvider(), new ResourceOwnerPasswordAccessTokenProvider(), final AccessTokenProvider accessTokenProvider = new AccessTokenProviderChain(Arrays.<AccessTokenProvider> asList(new MyAuthorizationCodeAccessTokenProvider(), new ImplicitAccessTokenProvider(), new ResourceOwnerPasswordAccessTokenProvider(),
new ClientCredentialsAccessTokenProvider())); new ClientCredentialsAccessTokenProvider()));
template.setAccessTokenProvider(accessTokenProvider); template.setAccessTokenProvider(accessTokenProvider);

View File

@ -0,0 +1,107 @@
package org.baeldung.reddit.classifier;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import org.apache.mahout.classifier.sgd.L2;
import org.apache.mahout.classifier.sgd.OnlineLogisticRegression;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.vectorizer.encoders.AdaptiveWordValueEncoder;
import org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder;
import org.apache.mahout.vectorizer.encoders.StaticWordValueEncoder;
import com.google.common.base.Splitter;
public class RedditClassifier {
public static int GOOD = 0;
public static int BAD = 1;
private final OnlineLogisticRegression classifier;
private final FeatureVectorEncoder titleEncoder;
private final FeatureVectorEncoder domainEncoder;
private final int[] trainCount = { 0, 0 };
private final int[] evalCount = { 0, 0 };
public RedditClassifier() {
classifier = new OnlineLogisticRegression(2, 4, new L2(1));
titleEncoder = new AdaptiveWordValueEncoder("title");
titleEncoder.setProbes(1);
domainEncoder = new StaticWordValueEncoder("domain");
domainEncoder.setProbes(1);
}
public void trainClassifier(String fileName) throws IOException {
final BufferedReader reader = new BufferedReader(new FileReader(fileName));
int category;
Vector features;
String line = reader.readLine();
if (line == null) {
new RedditDataCollector().collectData();
}
while ((line != null) && (line != "")) {
category = (line.startsWith("good")) ? GOOD : BAD;
trainCount[category]++;
features = convertLineToVector(line);
classifier.train(category, features);
line = reader.readLine();
}
reader.close();
System.out.println("Training count ========= " + trainCount[0] + "___" + trainCount[1]);
}
public int classify(Vector features) {
return classifier.classifyFull(features).maxValueIndex();
}
public Vector convertPost(String title, String domain, int hour) {
final Vector features = new RandomAccessSparseVector(4);
final int noOfWords = Splitter.onPattern("\\W").omitEmptyStrings().splitToList(title).size();
titleEncoder.addToVector(title, features);
domainEncoder.addToVector(domain, features);
features.set(2, hour);
features.set(3, noOfWords);
return features;
}
public double evaluateClassifier() throws IOException {
final BufferedReader reader = new BufferedReader(new FileReader(RedditDataCollector.TEST_FILE));
int category, result;
int correct = 0;
int wrong = 0;
Vector features;
String line = reader.readLine();
while ((line != null) && (line != "")) {
category = (line.startsWith("good")) ? GOOD : BAD;
evalCount[category]++;
features = convertLineToVector(line);
result = classify(features);
if (category == result) {
correct++;
} else {
wrong++;
}
line = reader.readLine();
}
reader.close();
System.out.println(correct + " ----- " + wrong);
System.out.println("Eval count ========= " + evalCount[0] + "___" + evalCount[1]);
return correct / (wrong + correct + 0.0);
}
// ==== private
private Vector convertLineToVector(String line) {
final Vector features = new RandomAccessSparseVector(4);
final String[] items = line.split(";");
titleEncoder.addToVector(items[3], features);
domainEncoder.addToVector(items[4], features);
features.set(2, Integer.parseInt(items[1])); // hour of day
features.set(3, Integer.parseInt(items[2])); // number of words in the title
return features;
}
}

View File

@ -0,0 +1,109 @@
package org.baeldung.reddit.classifier;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.baeldung.reddit.util.UserAgentInterceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.web.client.RestTemplate;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
public class RedditDataCollector {
public static final String TRAINING_FILE = "src/main/resources/train.csv";
public static final String TEST_FILE = "src/main/resources/test.csv";
private final Logger logger = LoggerFactory.getLogger(getClass());
private String postAfter;
private final RestTemplate restTemplate;
private final String subreddit;
private final int minScore;
public RedditDataCollector() {
restTemplate = new RestTemplate();
final List<ClientHttpRequestInterceptor> list = new ArrayList<ClientHttpRequestInterceptor>();
list.add(new UserAgentInterceptor());
restTemplate.setInterceptors(list);
subreddit = "all";
minScore = 4;
}
public RedditDataCollector(String subreddit, int minScore) {
restTemplate = new RestTemplate();
final List<ClientHttpRequestInterceptor> list = new ArrayList<ClientHttpRequestInterceptor>();
list.add(new UserAgentInterceptor());
restTemplate.setInterceptors(list);
this.subreddit = subreddit;
this.minScore = minScore;
}
public void collectData() {
final int limit = 100;
final int noOfRounds = 80;
try {
final FileWriter writer = new FileWriter(TRAINING_FILE);
for (int i = 0; i < noOfRounds; i++) {
getPosts(limit, writer);
}
writer.close();
final FileWriter testWriter = new FileWriter(TEST_FILE);
getPosts(limit, testWriter);
testWriter.close();
} catch (final Exception e) {
logger.error("write to file error", e);
}
}
// ==== private
private void getPosts(int limit, FileWriter writer) {
String fullUrl = "http://www.reddit.com/r/" + subreddit + "/new.json?limit=" + limit;
if (postAfter != null) {
fullUrl += "&count=" + limit + "&after=" + postAfter;
}
try {
final JsonNode node = restTemplate.getForObject(fullUrl, JsonNode.class);
parseNode(node, writer);
Thread.sleep(3000);
} catch (final Exception e) {
logger.error("server error", e);
}
}
private void parseNode(JsonNode node, FileWriter writer) throws IOException {
postAfter = node.get("data").get("after").asText();
System.out.println(postAfter);
String line;
String category;
List<String> words;
final SimpleDateFormat df = new SimpleDateFormat("HH");
for (final JsonNode child : node.get("data").get("children")) {
category = (child.get("data").get("score").asInt() < minScore) ? "bad" : "good";
words = Splitter.onPattern("\\W").omitEmptyStrings().splitToList(child.get("data").get("title").asText());
final Date date = new Date(child.get("data").get("created_utc").asLong() * 1000);
line = category + ";";
line += df.format(date) + ";";
line += words.size() + ";" + Joiner.on(' ').join(words) + ";";
line += child.get("data").get("domain").asText() + "\n";
writer.write(line);
}
}
public static void main(String[] args) {
final RedditDataCollector collector = new RedditDataCollector();
collector.collectData();
}
}

View File

@ -0,0 +1,20 @@
package org.baeldung.reddit.util;
import java.io.IOException;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpRequest;
import org.springframework.http.client.ClientHttpRequestExecution;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.http.client.ClientHttpResponse;
public class UserAgentInterceptor implements ClientHttpRequestInterceptor {
@Override
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
final HttpHeaders headers = request.getHeaders();
headers.add("User-Agent", "Schedule with Reddit");
return execution.execute(request, body);
}
}

View File

@ -12,6 +12,7 @@ import org.baeldung.persistence.dao.PostRepository;
import org.baeldung.persistence.dao.UserRepository; import org.baeldung.persistence.dao.UserRepository;
import org.baeldung.persistence.model.Post; import org.baeldung.persistence.model.Post;
import org.baeldung.persistence.model.User; import org.baeldung.persistence.model.User;
import org.baeldung.reddit.classifier.RedditClassifier;
import org.baeldung.reddit.util.RedditApiConstants; import org.baeldung.reddit.util.RedditApiConstants;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -30,6 +31,7 @@ import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.ResponseStatus; import org.springframework.web.bind.annotation.ResponseStatus;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
@ -40,6 +42,7 @@ public class RedditController {
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm"); private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm");
private final SimpleDateFormat dfHour = new SimpleDateFormat("HH");
@Autowired @Autowired
private OAuth2RestTemplate redditRestTemplate; private OAuth2RestTemplate redditRestTemplate;
@ -50,6 +53,9 @@ public class RedditController {
@Autowired @Autowired
private PostRepository postReopsitory; private PostRepository postReopsitory;
@Autowired
private RedditClassifier redditClassifier;
@RequestMapping("/login") @RequestMapping("/login")
public final String redditLogin() { public final String redditLogin() {
final JsonNode node = redditRestTemplate.getForObject("https://oauth.reddit.com/api/v1/me", JsonNode.class); final JsonNode node = redditRestTemplate.getForObject("https://oauth.reddit.com/api/v1/me", JsonNode.class);
@ -122,6 +128,14 @@ public class RedditController {
return "postListView"; return "postListView";
} }
@RequestMapping(value = "/predicatePostResponse", method = RequestMethod.POST)
@ResponseBody
public final String predicatePostResponse(@RequestParam(value = "title") final String title, @RequestParam(value = "domain") final String domain) {
final int hour = Integer.parseInt(dfHour.format(new Date()));
final int result = redditClassifier.classify(redditClassifier.convertPost(title, domain, hour));
return (result == RedditClassifier.GOOD) ? "{Good Response}" : "{Bad response}";
}
// === post actions // === post actions
@RequestMapping(value = "/deletePost/{id}", method = RequestMethod.DELETE) @RequestMapping(value = "/deletePost/{id}", method = RequestMethod.DELETE)

View File

@ -0,0 +1,100 @@
bad;20;5;The President s Kill List;newyorker.com
bad;20;8;Home stretch for the Ancient History Magazine Kickstarter;kickstarter.com
good;20;20;A constant Burn to disk link under right click menu but there s no optical drive on retina Macbook Pro;i.imgur.com
bad;20;8;Grey s be creeping on the button like;gph.is
good;20;16;Droid Life First look at Moto 360 s Gorgeous and Impossible to Find Monolink Metal Band;droid-life.com
bad;20;23;US Bleeding Hollow lt Unspoken gt has a couple spots open for our core team 3 7 M HM 10 10 H BRF;self.wowguilds
bad;20;30;I drove up to Orlando for nothing I m by the science center What s some stuff I can do for cheap free so I don t waste my day;self.orlando
bad;20;20;As we make computers more intelligent they ll eventually gain consciousness Then evolution would ve led to silicon based life;self.Showerthoughts
bad;20;5;i am jealous and miserable;self.SuicideWatch
bad;20;29;TIL that in addition to the recent gyrocopter landing the last person to land a helicopter on the White House or U S Capital lawn was also a Floridian;reddit.com
good;20;7;When we finally arrived at Comic Con;i.imgur.com
good;20;22;Just got a call that my OBGYN is out on an emergency for the next week I m being induced this Saturday;self.BabyBumps
good;20;10;Scientists discover intense magnetic field close to supermassive black hole;sciencedaily.com
bad;20;2;W ow;fap.to
bad;20;8;Wedding Photographers did you love your wedding photographer;self.WeddingPhotography
bad;20;38;This video reminds me of the hype in this sub for a Fallout 4 in New York Even though the style is a bit WAY off Most of the characters are a bit similar to the Fallout series;youtube.com
good;20;6;Max the animatronic abandoned at Disney;i.imgur.com
bad;20;2;Francia Expansions;self.HistoricalWorldPowers
bad;20;10;When you learn autotools and discover all am in files;youtube.com
good;20;1;Instincts;i.imgur.com
bad;20;6;No more bing rewards for searches;self.beermoney
bad;20;7;Launcher stops working when signing in HELP;self.GTAV
bad;20;15;I m new to the gym amp changing my diet but I m loving it;self.loseit
bad;20;9;Forbes The Chevy Bolt Tesla s Best News Yet;forbes.com
bad;20;12;Forth Rail Bridge obscured by fog OS 720x720 x post r ScottishPhotos;i.imgur.com
bad;20;2;Background score;self.Daredevil
bad;20;4;To MIT Media Lab;bitcoin-gr.org
good;20;3;Havana Street Art;self.cigars
bad;20;4;Gear PRS Custom 24;self.Guitar
bad;20;34;US Calif Co own a home with SO not married for 2 years Now we are breaking up and she only wants to give me my original down pymt instead of buying me out;self.legaladvice
good;20;6;There s been a fur der;imgur.com
good;20;17;ICA Use A Test Website UI Greg Miaskiewicz 0 40 1 5 min gt 95 gt 500;self.HITsWorthTurkingFor
bad;20;8;balloon popping sound in 5 4 3 2;i.imgur.com
good;20;12;My attempt at recreating the Imperial Insignia in the new teaser trailer;imgur.com
bad;20;10;This is the most important thing on the internet today;youtu.be
bad;20;6;A R Kane and Dirty Beaches;self.ambient
bad;20;10;What is a good hot air soldering for a hobbyist;self.AskElectronics
bad;20;8;Falconshield This Is War 4 Freljord COLLAB draggles;np.reddit.com
bad;20;3;Sacramento home appraiser;sacramentovalleyappraisal.com
bad;20;10;FIRST WALK OFF OF THE YEAR UPVOTE PARTY r CHICubs;reddit.com
bad;20;23;Ukrainian notables politicians journalists react to murder of Kalashnikov and Buzhina See their fb posts in link Most popular opinion Putin did it;pravda.com.ua
bad;20;4;Maclock watch for sale;self.MacMiller
bad;20;2;me irl;imgur.com
bad;20;7;Looking to recruit EU Aliance Outland BCH;self.wowraf
bad;20;10;Trying to understand potential of the restaurant food truck industry;self.Entrepreneur
bad;20;3;Democracy Now Bitcoin;bitcoin-gr.org
good;20;4;Got pulled over today;self.motorcycles
bad;20;7;ESPN Reporter Britt McHenry is a shitlord;liveleak.com
bad;20;15;I m looking for places to buy local craft beers by the bottle Any suggestions;self.dayton
bad;20;5;No username in login screen;self.techsupport
good;20;7;Ads for the upcoming election in Alberta;youtube.com
bad;20;11;Michael Savage Hillary Clinton s Looks Alone Could Sink The Campaign;rightwingwatch.org
good;20;4;Another from People Magazine;imgur.com
bad;20;10;ps4 LF2 CE CROTA CP need sword bearer Psn blackskyes;self.Fireteams
bad;20;4;Dealing with Cho mid;self.summonerschool
bad;20;3;C 3P0 Question;self.XWingTMG
bad;20;10;DUNGEON HUNTER 5 HACK AND CHEATS TOOL RESOURCES GENERATOR UTILITY;self.maxgiron
bad;20;11;I m searching for mature anime genre is not too relevant;self.Animesuggest
bad;20;3;Cassie s hair;self.MortalKombat
bad;20;6;Ha Ha Ha Ha 0 02;youtube.com
bad;20;8;Can someone make a simple I hope profile;self.ChromaProfiles
bad;20;5;I can t reinstall SMITE;self.Smite
bad;20;19;TIL That in 1974 a US Army Private stole a helicopter and landed it at the White House Twice;en.wikipedia.org
good;20;2;Rottweiler pup;imgur.com
good;20;4;New stealth Bonnaroo additions;self.bonnaroo
bad;20;5;Any Houston Rockets fans here;self.Eugene
good;20;10;Pictures of Tube Televisions the moment they re Turned Off;imgur.com
bad;20;10;Camping this summer with a 9 5 month old Tips;self.beyondthebump
good;20;15;Toss this into your old 1 4 7 based packs to improve falling block rendering;minecraftforum.net
good;20;13;StreetPass Thank You Bundle takes EXTRA 1 off the new Mii Plaza games;technologytell.com
good;20;11;First Step of Becoming a Real Sissy M m F m;self.gonewildstories
good;20;10;Big BART delays after apparent suicide at Civic Center station;sfgate.com
bad;20;5;tornado sirens in northwest suburbs;self.Minneapolis
good;20;2;Me irl;i.imgur.com
bad;20;13;Kimi Raikkonen hints at Ferrari stay in 2016 if the team wants him;espn.co.uk
bad;20;16;If you had to Explain a game of LoL in one Analogy what would it be;self.leagueoflegends
good;20;6;What is your guilty pleasure song;self.AskReddit
bad;20;14;Suggestion Don t unlock the next unit until you can afford to buy it;self.swarmsim
good;20;1;Metal;imgur.com
bad;20;6;TheFatRat Time Lapse House Electro 2015;soundcloud.com
bad;20;22;Hey Reddit is it true that the silver fillings dentists put in our teeth contains mercury and could be harmful over time;self.AskReddit
good;20;5;request Will Photoshop For Pizza;self.RandomActsOfPizza
bad;20;13;My wife likes to put small things on the big thing shelf fixed;i.imgur.com
bad;20;14;Mi novio es de M xico y quiero practicar con l pero estoy nerviosa;self.SpanishImmersion
bad;20;5;Anybody have experience with PageFair;self.adops
good;20;23;Osoba na snimci kriva je za nekoliko kra a bicikala po Zagrebu pa tako i maznuo jedan mom frendu Ako ga prepoznajete javite;youtube.com
bad;20;9;Grogheads com Brother Against Brother The AAR Part 1;grogheads.com
good;20;29;St Bernadette Soubirous Feast April 16th outside of France One of the many Incorrupt Saints She looked upon the face of the Blessed Virgin Mary 18 times at Lourdes;en.lourdes-france.org
bad;20;18;My mom texted asking what food I want in the fridge including my preferred flavor of Greek yogurt;i.imgur.com
good;20;18;Spoilers All If Dragon Age II had a subtitle s like Origins and Inquisition what would it be;self.dragonage
bad;20;5;Best hidden places on campus;self.IndianaUniversity
bad;20;8;Rematch Snack Santa Snackta is wonderful and generous;redditgifts.com
bad;20;13;Half Of Yemen s Population Is Going Hungry As Violence Worsens UN Says;huffingtonpost.com
good;20;6;RBC Heritage Round 1 Live Thread;self.dfsports
bad;20;7;Feeling full all the time swollen liver;self.stopdrinking
bad;20;9;Recent events for MLP Zero Hard treasure trail completed;self.TrueLionhearts
bad;20;5;PS4 LF2 for NF Weekly;self.Fireteams
bad;20;5;Stardust with Ocular by yitaku;soundcloud.com
bad;20;4;PC M9 Bayonet Slaughter;self.GlobalOffensiveTrade
good;20;2;Spare food;self.tampa
1 bad 20 5 The President s Kill List newyorker.com
2 bad 20 8 Home stretch for the Ancient History Magazine Kickstarter kickstarter.com
3 good 20 20 A constant Burn to disk link under right click menu but there s no optical drive on retina Macbook Pro i.imgur.com
4 bad 20 8 Grey s be creeping on the button like gph.is
5 good 20 16 Droid Life First look at Moto 360 s Gorgeous and Impossible to Find Monolink Metal Band droid-life.com
6 bad 20 23 US Bleeding Hollow lt Unspoken gt has a couple spots open for our core team 3 7 M HM 10 10 H BRF self.wowguilds
7 bad 20 30 I drove up to Orlando for nothing I m by the science center What s some stuff I can do for cheap free so I don t waste my day self.orlando
8 bad 20 20 As we make computers more intelligent they ll eventually gain consciousness Then evolution would ve led to silicon based life self.Showerthoughts
9 bad 20 5 i am jealous and miserable self.SuicideWatch
10 bad 20 29 TIL that in addition to the recent gyrocopter landing the last person to land a helicopter on the White House or U S Capital lawn was also a Floridian reddit.com
11 good 20 7 When we finally arrived at Comic Con i.imgur.com
12 good 20 22 Just got a call that my OBGYN is out on an emergency for the next week I m being induced this Saturday self.BabyBumps
13 good 20 10 Scientists discover intense magnetic field close to supermassive black hole sciencedaily.com
14 bad 20 2 W ow fap.to
15 bad 20 8 Wedding Photographers did you love your wedding photographer self.WeddingPhotography
16 bad 20 38 This video reminds me of the hype in this sub for a Fallout 4 in New York Even though the style is a bit WAY off Most of the characters are a bit similar to the Fallout series youtube.com
17 good 20 6 Max the animatronic abandoned at Disney i.imgur.com
18 bad 20 2 Francia Expansions self.HistoricalWorldPowers
19 bad 20 10 When you learn autotools and discover all am in files youtube.com
20 good 20 1 Instincts i.imgur.com
21 bad 20 6 No more bing rewards for searches self.beermoney
22 bad 20 7 Launcher stops working when signing in HELP self.GTAV
23 bad 20 15 I m new to the gym amp changing my diet but I m loving it self.loseit
24 bad 20 9 Forbes The Chevy Bolt Tesla s Best News Yet forbes.com
25 bad 20 12 Forth Rail Bridge obscured by fog OS 720x720 x post r ScottishPhotos i.imgur.com
26 bad 20 2 Background score self.Daredevil
27 bad 20 4 To MIT Media Lab bitcoin-gr.org
28 good 20 3 Havana Street Art self.cigars
29 bad 20 4 Gear PRS Custom 24 self.Guitar
30 bad 20 34 US Calif Co own a home with SO not married for 2 years Now we are breaking up and she only wants to give me my original down pymt instead of buying me out self.legaladvice
31 good 20 6 There s been a fur der imgur.com
32 good 20 17 ICA Use A Test Website UI Greg Miaskiewicz 0 40 1 5 min gt 95 gt 500 self.HITsWorthTurkingFor
33 bad 20 8 balloon popping sound in 5 4 3 2 i.imgur.com
34 good 20 12 My attempt at recreating the Imperial Insignia in the new teaser trailer imgur.com
35 bad 20 10 This is the most important thing on the internet today youtu.be
36 bad 20 6 A R Kane and Dirty Beaches self.ambient
37 bad 20 10 What is a good hot air soldering for a hobbyist self.AskElectronics
38 bad 20 8 Falconshield This Is War 4 Freljord COLLAB draggles np.reddit.com
39 bad 20 3 Sacramento home appraiser sacramentovalleyappraisal.com
40 bad 20 10 FIRST WALK OFF OF THE YEAR UPVOTE PARTY r CHICubs reddit.com
41 bad 20 23 Ukrainian notables politicians journalists react to murder of Kalashnikov and Buzhina See their fb posts in link Most popular opinion Putin did it pravda.com.ua
42 bad 20 4 Maclock watch for sale self.MacMiller
43 bad 20 2 me irl imgur.com
44 bad 20 7 Looking to recruit EU Aliance Outland BCH self.wowraf
45 bad 20 10 Trying to understand potential of the restaurant food truck industry self.Entrepreneur
46 bad 20 3 Democracy Now Bitcoin bitcoin-gr.org
47 good 20 4 Got pulled over today self.motorcycles
48 bad 20 7 ESPN Reporter Britt McHenry is a shitlord liveleak.com
49 bad 20 15 I m looking for places to buy local craft beers by the bottle Any suggestions self.dayton
50 bad 20 5 No username in login screen self.techsupport
51 good 20 7 Ads for the upcoming election in Alberta youtube.com
52 bad 20 11 Michael Savage Hillary Clinton s Looks Alone Could Sink The Campaign rightwingwatch.org
53 good 20 4 Another from People Magazine imgur.com
54 bad 20 10 ps4 LF2 CE CROTA CP need sword bearer Psn blackskyes self.Fireteams
55 bad 20 4 Dealing with Cho mid self.summonerschool
56 bad 20 3 C 3P0 Question self.XWingTMG
57 bad 20 10 DUNGEON HUNTER 5 HACK AND CHEATS TOOL RESOURCES GENERATOR UTILITY self.maxgiron
58 bad 20 11 I m searching for mature anime genre is not too relevant self.Animesuggest
59 bad 20 3 Cassie s hair self.MortalKombat
60 bad 20 6 Ha Ha Ha Ha 0 02 youtube.com
61 bad 20 8 Can someone make a simple I hope profile self.ChromaProfiles
62 bad 20 5 I can t reinstall SMITE self.Smite
63 bad 20 19 TIL That in 1974 a US Army Private stole a helicopter and landed it at the White House Twice en.wikipedia.org
64 good 20 2 Rottweiler pup imgur.com
65 good 20 4 New stealth Bonnaroo additions self.bonnaroo
66 bad 20 5 Any Houston Rockets fans here self.Eugene
67 good 20 10 Pictures of Tube Televisions the moment they re Turned Off imgur.com
68 bad 20 10 Camping this summer with a 9 5 month old Tips self.beyondthebump
69 good 20 15 Toss this into your old 1 4 7 based packs to improve falling block rendering minecraftforum.net
70 good 20 13 StreetPass Thank You Bundle takes EXTRA 1 off the new Mii Plaza games technologytell.com
71 good 20 11 First Step of Becoming a Real Sissy M m F m self.gonewildstories
72 good 20 10 Big BART delays after apparent suicide at Civic Center station sfgate.com
73 bad 20 5 tornado sirens in northwest suburbs self.Minneapolis
74 good 20 2 Me irl i.imgur.com
75 bad 20 13 Kimi Raikkonen hints at Ferrari stay in 2016 if the team wants him espn.co.uk
76 bad 20 16 If you had to Explain a game of LoL in one Analogy what would it be self.leagueoflegends
77 good 20 6 What is your guilty pleasure song self.AskReddit
78 bad 20 14 Suggestion Don t unlock the next unit until you can afford to buy it self.swarmsim
79 good 20 1 Metal imgur.com
80 bad 20 6 TheFatRat Time Lapse House Electro 2015 soundcloud.com
81 bad 20 22 Hey Reddit is it true that the silver fillings dentists put in our teeth contains mercury and could be harmful over time self.AskReddit
82 good 20 5 request Will Photoshop For Pizza self.RandomActsOfPizza
83 bad 20 13 My wife likes to put small things on the big thing shelf fixed i.imgur.com
84 bad 20 14 Mi novio es de M xico y quiero practicar con l pero estoy nerviosa self.SpanishImmersion
85 bad 20 5 Anybody have experience with PageFair self.adops
86 good 20 23 Osoba na snimci kriva je za nekoliko kra a bicikala po Zagrebu pa tako i maznuo jedan mom frendu Ako ga prepoznajete javite youtube.com
87 bad 20 9 Grogheads com Brother Against Brother The AAR Part 1 grogheads.com
88 good 20 29 St Bernadette Soubirous Feast April 16th outside of France One of the many Incorrupt Saints She looked upon the face of the Blessed Virgin Mary 18 times at Lourdes en.lourdes-france.org
89 bad 20 18 My mom texted asking what food I want in the fridge including my preferred flavor of Greek yogurt i.imgur.com
90 good 20 18 Spoilers All If Dragon Age II had a subtitle s like Origins and Inquisition what would it be self.dragonage
91 bad 20 5 Best hidden places on campus self.IndianaUniversity
92 bad 20 8 Rematch Snack Santa Snackta is wonderful and generous redditgifts.com
93 bad 20 13 Half Of Yemen s Population Is Going Hungry As Violence Worsens UN Says huffingtonpost.com
94 good 20 6 RBC Heritage Round 1 Live Thread self.dfsports
95 bad 20 7 Feeling full all the time swollen liver self.stopdrinking
96 bad 20 9 Recent events for MLP Zero Hard treasure trail completed self.TrueLionhearts
97 bad 20 5 PS4 LF2 for NF Weekly self.Fireteams
98 bad 20 5 Stardust with Ocular by yitaku soundcloud.com
99 bad 20 4 PC M9 Bayonet Slaughter self.GlobalOffensiveTrade
100 good 20 2 Spare food self.tampa

File diff suppressed because it is too large Load Diff

View File

@ -76,9 +76,35 @@ border-color: #ddd;
<img src="http://www.reddit.com/captcha/${iden}" alt="captcha" width="200"/> <img src="http://www.reddit.com/captcha/${iden}" alt="captcha" width="200"/>
</c:if> </c:if>
<br><br> <br><br>
<span class="col-sm-3"><button type="submit" class="btn btn-primary">Post</button></span> <span class="col-sm-3"><button id="submitbtn" type="submit" class="btn btn-primary">Post</button></span>
</div> </div>
</form> </form>
<div>
<button id="checkbtn" class="btn btn-default disabled" onclick="predicateResponse()">Predicate Response</button>
<span id="prediction"></span>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<script type="text/javascript">
$("input").change(function() {
if($("#submitbtn").hasClass("disabled")){
if(! $("#checkbtn").hasClass("disabled")){
$("#checkbtn").addClass("disabled");
}
}else{
$("#checkbtn").removeClass("disabled");
}
});
function predicateResponse(){
var title = $('input[name="title"]').val();
var domain = $('input[name="url"]').val();
domain = $('<a>').prop('href', domain).prop('hostname');
console.log(domain);
$.post("<c:url value="/predicatePostResponse"></c:url>",{title: title, domain: domain} ,function(data){
$("#prediction").addClass("alert alert-info").html(data.replace('{','').replace('}',''));
});
}
</script>
</div>
</div> </div>
</body> </body>
</html> </html>

View File

@ -0,0 +1,28 @@
package org.baeldung.classifier;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import org.baeldung.reddit.classifier.RedditClassifier;
import org.baeldung.reddit.classifier.RedditDataCollector;
import org.junit.Before;
import org.junit.Test;
public class RedditClassifierTest {
private RedditClassifier classifier;
@Before
public void init() throws IOException {
classifier = new RedditClassifier();
classifier.trainClassifier(RedditDataCollector.TRAINING_FILE);
}
@Test
public void testClassifier() throws IOException {
final double result = classifier.evaluateClassifier();
System.out.println("Accuracy = " + result);
assertTrue(result > 0.8);
}
}