Latest Code here
import java.io.BufferedInputStream; import java.io.IOException; import java.io.FileOutputStream; import java.io.BufferedOutputStream; import java.net.URL; public class FileDownloader { public static void main(String args[]) { if(args.length != 2 && !(args[0].equals(""))){ System.out.println("Usage: <web-link> <new-filename>"); }else{try{ if((args[0].substring(7,10)).equals("172")) args[0] = "http://cl.thapar.edu/" + args[0].substring(20); System.out.println("FileDownloader: "+args[0]+" "+args[1]); URL url = new URL(args[0]); java.io.BufferedInputStream in = new BufferedInputStream(url.openStream()); java.io.FileOutputStream fos = new FileOutputStream(args[1]+".pdf"); java.io.BufferedOutputStream bout = new BufferedOutputStream(fos,1024); byte[] data = new byte[1024]; int x=0; while((x=in.read(data,0,1024))>=0) { bout.write(data,0,x); } bout.close(); in.close(); }catch(IOException e){ System.out.println(e.toString()); }} } }
//File: QPDownloader.java import java.io.IOException; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.AbstractList; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.util.ArrayList; import java.util.TreeSet; import java.util.Iterator; class CourseInfo{ CourseInfo(String na, String li){ name = (na!=null)?na:""; link = (li!=null)?li:""; System.out.print(name + "\n" + link + "\n"); } String link; String name; } class SeasonPage{//Contains info for 1 exam season void setCourseInfo(String input){ Pattern pattern1 = Pattern.compile("http.*pdf"); Matcher matcher1 = pattern1.matcher(input); Pattern pattern2 = Pattern.compile(">.*<"); Matcher matcher2 = pattern2.matcher(input); if(matcher1.find() && matcher2.find()){ int start = matcher1.start(); int end = matcher1.end(); String course_link = input.substring(start, end); start = matcher2.start()+1; end = matcher2.end()-1; String course_name = input.substring(start, end).trim(); coursesInfo.add(new CourseInfo(course_name, course_link)); System.out.println(course_name); courses.add(course_name); } } public String toString(){ return name; } SeasonPage(String na, String li){ name = (na!=null)?na:""; link = (li!=null)?li:""; System.out.print(name + "\n" + link + "\n"); //initialise coursesInfo String seasonPageHTML = (new DownloadHTML(link)).getHTML(); //http://172.31.19.11/qp/esmay09/BH008.pdf" style="text-decoration: underline;"> BH008</ //http://cl.thapar.edu/qp/EN0105.pdf">EN105</ String patternString = "http.+pdf\".*>\\s*\\w*\\s*</a"; Pattern pattern = Pattern.compile(patternString); Matcher matcher = pattern.matcher(seasonPageHTML); while(matcher.find()){ int start = matcher.start(); int end = matcher.end(); String match = seasonPageHTML.substring(start, end); setCourseInfo(match); } // System.out.println("========================================================"); } static void printCourses(){ System.out.println(courses.size()); } String link; String name; ArrayList<CourseInfo> coursesInfo = new ArrayList<CourseInfo>(); static TreeSet<String> courses = new TreeSet<String>(); } public class QPDownloader{//downloads links from the html select box and saves each in SeasonPage object QPDownloader(){ String input = (new DownloadHTML("http://cl.thapar.edu/library_qp.html")).getHTML(); String patternString = "<option.+option>"; Pattern pattern = Pattern.compile(patternString); Matcher matcher = pattern.matcher(input); while(matcher.find()){ int start = matcher.start()+14; int end = matcher.end()-8; String match = input.substring(start, end); //for each page full of pdf links, read pdf link and its name in anchor tag. setExamSeasonInfo(match); } // printExamSeasonInfo(); // SeasonPage.printCourses(); } void setExamSeasonInfo(String input){ Pattern pattern1 = Pattern.compile("http.*html"); Matcher matcher1 = pattern1.matcher(input); Pattern pattern2 = Pattern.compile(">.*<"); Matcher matcher2 = pattern2.matcher(input); if(matcher1.find() && matcher2.find()){ int start = matcher1.start(); int end = matcher1.end(); String season_link = input.substring(start, end); start = matcher2.start()+1; end = matcher2.end()-1; String season_name = input.substring(start, end); seasonPagesInfo.add(new SeasonPage(season_name, season_link)); } } void printExamSeasonInfo(){ Iterator season=seasonPagesInfo.iterator(); while(season.hasNext()) { SeasonPage s = (SeasonPage)(season.next()); System.out.println(s.name); System.out.println(s.link); System.out.println(""); } } public static void main(String[] args){ QPDownloader examSeasons = new QPDownloader(); } ArrayList<SeasonPage> seasonPagesInfo = new ArrayList<SeasonPage>(); } class DownloadHTML{ DownloadHTML(String web_link){ try{ // Here we give the URL for the Crawler URL url = new URL(web_link); strbuf = new StringBuffer(); System.setProperty("http.proxyHost",""); System.setProperty("http.proxyPort", ""); URLConnection conn = url.openConnection(); DataInputStream in = new DataInputStream ( conn.getInputStream ( ) ) ; BufferedReader d = new BufferedReader(new InputStreamReader(in)); while(d.ready()) { //System.out.println(d.readLine()); strbuf.append(d.readLine()+"\n"); } } catch(IOException e) { System.out.println(e); } } String getHTML() { return strbuf.toString(); } private StringBuffer strbuf; }
//File: GUI_QPDownloader.java import java.awt.BorderLayout; import java.awt.Container; import java.awt.Component; import java.awt.event.ActionListener; import java.awt.event.ActionEvent; import java.awt.event.ItemListener; import java.awt.event.ItemEvent; import javax.swing.JFrame; import javax.swing.UIManager; import javax.swing.UnsupportedLookAndFeelException; import javax.swing.JScrollPane; import javax.swing.JPanel; import javax.swing.JTable; import javax.swing.JButton; import javax.swing.JCheckBox; import javax.swing.AbstractButton; import javax.swing.table.DefaultTableModel; import javax.swing.table.TableCellRenderer; import java.util.Vector; import java.util.ArrayList; class CheckBoxRenderer implements TableCellRenderer, ItemListener { public CheckBoxRenderer() { } public void itemStateChanged(ItemEvent itemEvent) { AbstractButton abstractButton = (AbstractButton)itemEvent.getSource(); int state = itemEvent.getStateChange(); if (state == ItemEvent.SELECTED) { //cb.setSelected(!cb.isSelected()); System.out.println(course + " checkbox change"); } } public Component getTableCellRendererComponent( JTable table, Object ob, boolean isSelected, boolean hasFocus, int row, int column) { cb = (JCheckBox)ob; course = cb.getText(); if(cb.getText().equals("")){//if label is "" cb.setEnabled(false); } cb.addItemListener(this); return cb; } JCheckBox cb; String course; } public class GUI_QPDownloader extends JFrame{ JButton dl_btn = new JButton("Download Selected"); JTable table; QPDownloader qpdown; public GUI_QPDownloader(String name) { super(name); //setResizable(false); } public void addComponentsToPane(final Container pane) { qpdown = new QPDownloader(); final Object[] columnNames = new String[qpdown.seasonPagesInfo.size()]; int i=-1; for(SeasonPage sp: qpdown.seasonPagesInfo){ columnNames[++i] = sp.toString(); } System.out.println(SeasonPage.courses.size()+" "+qpdown.seasonPagesInfo.size()); final Object[][] data = new JCheckBox[SeasonPage.courses.size()][qpdown.seasonPagesInfo.size()]; final boolean[][] editable = new boolean[SeasonPage.courses.size()][qpdown.seasonPagesInfo.size()]; int r=-1; for(String course: SeasonPage.courses){//for each course or row int c = -1; ++r; //System.out.println(course); for(SeasonPage season: qpdown.seasonPagesInfo){//for each season or col //if course exits in season.coursesInfo.name then add JCheckBox boolean exists = false; for(CourseInfo ci: season.coursesInfo){ if((ci.name).equals(course)){ exists = true; break; } } if(exists){ data[r][++c]=new JCheckBox(course, false); editable[r][c] = true; }else{ data[r][++c]=new JCheckBox("", false); editable[r][c] = false; } } } //System.out.println((new JCheckBox()).getClass().toString() + " " + data[0][0].getClass().toString()); final ArrayList v = new ArrayList(SeasonPage.courses); table = new JTable(){ public TableCellRenderer getCellRenderer(int row, int column) { return new CheckBoxRenderer(); } }; table.setModel(new DefaultTableModel(data, columnNames){ public boolean isCellEditable(int r, int c) { return editable[r][c]; } public Class getColumnClass(int columnIndex){ return Boolean.class;//(new JCheckBox()).getClass(); } public Object getValueAt(int row, int col){ return data[row][col]; } public void setValueAt(Object value, int row, int col) { ((JCheckBox)data[row][col]).setSelected((Boolean)value); fireTableCellUpdated(row, col); } }); table.setFillsViewportHeight(true); //controls JPanel controls = new JPanel(); dl_btn.addActionListener(new ActionListener(){ public void actionPerformed(ActionEvent e){ System.out.println("Download btn event"); int r=-1; for(String course: SeasonPage.courses){//for each course or row int c = -1; ++r; for(SeasonPage season: qpdown.seasonPagesInfo){//for each season or col if(((JCheckBox)data[r][++c]).isSelected()){ for(CourseInfo ci : season.coursesInfo){ if(course.equals(ci.name)){ FileDownloader.main(new String[]{ci.link, course+" "+season.name}); break; } } } } } } }); controls.add(dl_btn); pane.setLayout(new BorderLayout()); //pane.add(table.getTableHeader(), BorderLayout.PAGE_START); pane.add(new JScrollPane(table), BorderLayout.CENTER); pane.add(controls, BorderLayout.SOUTH); } public static void main(String[] args){ // Use an appropriate Look and Feel try { UIManager.setLookAndFeel("com.sun.java.swing.plaf.windows.WindowsLookAndFeel"); //UIManager.setLookAndFeel("javax.swing.plaf.metal.MetalLookAndFeel"); } catch (UnsupportedLookAndFeelException ex) { ex.printStackTrace(); } catch (IllegalAccessException ex) { ex.printStackTrace(); } catch (InstantiationException ex) { ex.printStackTrace(); } catch (ClassNotFoundException ex) { ex.printStackTrace(); } // Turn off metal's use of bold fonts UIManager.put("swing.boldMetal", Boolean.FALSE); //Schedule a job for the event dispatch thread: //creating and showing this application's GUI. javax.swing.SwingUtilities.invokeLater(new Runnable() { public void run() { //Create and set up the window. GUI_QPDownloader frame = new GUI_QPDownloader("TU QPDownloader"); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); //Set up the content pane. frame.addComponentsToPane(frame.getContentPane()); //Display the window. frame.pack(); frame.setVisible(true); } }); } }
No comments:
Post a Comment