读中文的WORD和TXT的简历,保存到类Resume中

作者:源码世界时间:2014-03-13分类:程序员碎语评论:0浏览:6127
package org.fuxin;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeMap;

public class BatchDoc2Resume {

	public static void main(String[] args) {
		//把WORD文档保存成TXT文档,读取信息进行解析
		long start = System.currentTimeMillis();
		
		//读取某个文件夹下所有的TXT和DOC文件
		//生成Resume
		//存放入resumeList中
		String sourceDir="e:\\resume";
		String keywordFile="e:\\resume\\keyword.csv";
		ArrayList<Resume> resumeList = BatchDoc2Resume.getAllResume(sourceDir,keywordFile);
		
		
		/*
		String tmpFile = "e:\\resume\\tmp.txt";
		        
		Word2Txt.extractDoc("e:\\resume\\赵青.doc",tmpFile);
		Resume myrsm=Txt2Resume.readStringFile(tmpFile);
        sop(myrsm);
		*/
		
        long end = System.currentTimeMillis();
        System.out.println((end-start)+"ms");
	}
	
	private static ArrayList<Resume> getAllResume(String sourceDir, String keywordFile) {
		// TODO Auto-generated method stub
		//先取得该目录下的所有后缀名为doc和txt的文件,除去tmp.txt不要
		ArrayList<Resume> rtResumeList = new ArrayList<Resume>();
		ArrayList<String> fileList = FileViewer.getListFiles(sourceDir, "doc", true);
		if(fileList.size()==0) 
			sop(sourceDir+"下没有后缀为doc的简历");
		
		TreeMap<String,String> keywordMap = ReadKeyword.readFromFile(keywordFile);
		
		for(Iterator<String> it=fileList.iterator();it.hasNext();)
		{
			String tmpFile = "e:\\resume\\tmp.txt";
	        String docFile = it.next();
	        sop(docFile);
			Word2Txt.extractDoc(docFile,tmpFile);
			Resume myrsm=Txt2Resume.readStringFile(tmpFile,keywordMap);
	        sop(myrsm);
	        
	            
	        rtResumeList.add(myrsm);
	        
		}
		
		fileList.clear();
		fileList = FileViewer.getListFiles(sourceDir, "txt", true);
		if(fileList.size()==0) 
			sop(sourceDir+"下没有后缀为txt的简历");
		
		
		for(Iterator<String> it=fileList.iterator();it.hasNext();)
		{
			//String tmpFile = "e:\\resume\\tmp.txt";
	        
			//Word2Txt.extractDoc(it.next(),tmpFile);
			String filename=it.next();
			if(!filename.contains("tmp.txt"))
			{
				sop(filename);
				Resume myrsm=Txt2Resume.readStringFile(filename,keywordMap);
		        sop(myrsm);
				rtResumeList.add(myrsm);
			}
	        
		}
		
		sop("共读取了"+rtResumeList.size()+"份简历!");
		return rtResumeList;
	}

	static void sop(Object obj)
	{
		System.out.println(obj);
	}
}
标签:Java  
返回顶部
分享按钮