项目:lodreclib
文件:Evaluator.java
private void computeItemSim() {
List<Integer> sortedItems = new ArrayList<Integer>();
sortedItems.addAll(items);
Collections.sort(sortedItems);
int id1,id2;
for (int i = 0; i < sortedItems.size() - 1; i++) {
id1 = sortedItems.get(i);
this.itemSim.put(id1,new TIntFloatHashMap());
for (int j = i + 1; j < sortedItems.size(); j++) {
id2 = sortedItems.get(j);
float val = 0;
if (map_item_intFeatures.containsKey(id1)
& map_item_intFeatures.containsKey(id2))
val = cmpJaccardSim(this.map_item_intFeatures.get(id1)
.keySet(),this.map_item_intFeatures.get(id2)
.keySet());
if (val > MIN_SIM) {
itemSim.get(id1).put(id2,val);
}
}
}
}
项目:wikit
文件:SimUtils.java
/**
* Normalize a vector to unit length.
* @param X
* @return
*/
public static TIntFloatMap normalizeVector(TIntFloatMap X) {
TIntFloatHashMap Y = new TIntFloatHashMap();
double sumSquares = 0.0;
for (double x : X.values()) {
sumSquares += x * x;
}
if (sumSquares != 0.0) {
double norm = Math.sqrt(sumSquares);
for (int id : X.keys()) {
Y.put(id,(float) (X.get(id) / norm));
}
return Y;
}
return X;
}
项目:openimaj
文件:HoughCircles.java
/**
* Construct with the given parameters.
*
* @param minRad
* minimum search radius
* @param maxRad
* maximum search radius
* @param radIncrement
* amount to increment search radius by between min and max.
* @param nDegree
* number of degree increments
*/
public HoughCircles(int minRad,int maxRad,int radIncrement,int nDegree) {
super();
this.minRad = minRad;
if (this.minRad <= 0)
this.minRad = 1;
this.maxRad = maxRad;
this.radmap = new TIntObjectHashMap<TIntObjectHashMap<TIntFloatHashMap>>();
this.radIncr = radIncrement;
this.nRadius = (maxRad - minRad) / this.radIncr;
this.nDegree = nDegree;
this.cosanglemap = new float[nRadius][nDegree];
this.sinanglemap = new float[nRadius][nDegree];
for (int radIndex = 0; radIndex < this.nRadius; radIndex++) {
for (int angIndex = 0; angIndex < nDegree; angIndex++) {
final double ang = angIndex * (2 * PI / nDegree);
final double rad = minRad + (radIndex * this.radIncr);
this.cosanglemap[radIndex][angIndex] = (float) (rad * cos(ang));
this.sinanglemap[radIndex][angIndex] = (float) (rad * sin(ang));
}
}
}
项目:fnlp
文件:KMeansWordCluster.java
private float distanceEuclidean(int n,HashSparseVector sv,float baseDistance) {
HashSparseVector center = classCenter.get(n);
int count = classCount.get(n);
float dist = baseDistance / (count * count);
TIntFloatHashMap data = center.data;
TIntFloatIterator it = sv.data.iterator();
while (it.hasNext()) {
it.advance();
int key = it.key();
if (!data.containsKey(key)) {
dist += it.value() * it.value();
}
else {
float temp = data.get(key) / count;
dist -= temp * temp;
dist += (it.value() - temp) * (it.value() - temp);
}
}
return dist;
}
项目:fnlp
文件:KMeansWordCluster.java
private void updateBaseDist(int classid,HashSparseVector vector) {
float base = baseDistList.get(classid);
TIntFloatHashMap center = classCenter.get(classid).data;
TIntFloatIterator it = vector.data.iterator();
while (it.hasNext()) {
it.advance();
if (!center.containsKey(it.key())) {
base += it.value() * it.value();
}
else {
float temp = center.get(it.key());
base -= temp * temp;
base += (it.value() - temp) * (it.value() - temp);
}
}
baseDistList.set(classid,base);
}
项目:fnlp
文件:WordCluster.java
private float getweight(int c1,int c2) {
int max,min;
if(c1<=c2){
max = c2;
min = c1;
}else{
max = c1;
min = c2;
}
float w;
TIntFloatHashMap map2 = wcc.get(min);
if(map2==null){
w = 0;
}else
w = map2.get(max);
return w;
}
项目:fnlp
文件:MyArrays.java
/**
* 得到总能量值大于thres的元素对应的下标
*
* @param data 稀疏向量
* @param thres
* @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
*/
public static int[][] getTop(TIntFloatHashMap data,float thres) {
int[] idx = sort(data);
int i;
float total = 0;
float[] cp = new float[idx.length];
for (i = idx.length; i-- > 0;) {
cp[i] = (float) Math.pow(data.get(idx[i]),2);
total += cp[i];
}
float ratio = 0;
for (i = 0; i < idx.length; i++) {
ratio += cp[i] / total;
if (ratio > thres)
break;
}
int[][] a = new int[2][];
a[0] = Arrays.copyOfRange(idx,i);
a[1] = Arrays.copyOfRange(idx,i,idx.length);
return a;
}
项目:fnlp
文件:MyHashSparseArrays.java
/**
* 得到总能量值大于thres的元素对应的下标
*
* @param data 稀疏向量
* @param thres
* @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
*/
public static int[][] getTop(TIntFloatHashMap data,idx.length);
return a;
}
项目:fnlp
文件:MyCollection.java
/**
* 由大到小排序
* @param tmap
* @return 数组下标
*/
public static int[] sort(TIntFloatHashMap tmap) {
HashMap<Integer,Float> map = new HashMap<Integer,Float>();
TIntFloatIterator it = tmap.iterator();
while (it.hasNext()) {
it.advance();
int id = it.key();
float val = it.value();
map.put(id,Math.abs(val));
}
it = null;
List<Entry> list = sort(map);
int[] idx = new int[list.size()];
Iterator<Entry> it1 = list.iterator();
int i=0;
while (it1.hasNext()) {
Entry entry = it1.next();
idx[i++] = (Integer) entry.getKey();
}
return idx;
}
项目:Zeppa-AppEngine
文件:SemSigUtils.java
/**
* Normalizes the probability values in a vector so that to sum to 1.0
* @param vector
* @return
*/
public static TIntFloatMap normalizeVector(TIntFloatMap vector)
{
float total = 0;
TFloatIterator iter = vector.valueCollection().iterator();
while (iter.hasNext())
total += iter.next();
TIntFloatMap normalized = new TIntFloatHashMap(vector.size());
TIntFloatIterator iter2 = vector.iterator();
while (iter2.hasNext())
{
iter2.advance();
normalized.put(iter2.key(),iter2.value() / total);
}
return normalized;
}
项目:ADW
文件:SemSigUtils.java
/**
* Normalizes the probability values in a vector so that to sum to 1.0
* @param vector
* @return
*/
public static TIntFloatMap normalizeVector(TIntFloatMap vector)
{
float total = 0;
TFloatIterator iter = vector.valueCollection().iterator();
while (iter.hasNext())
total += iter.next();
TIntFloatMap normalized = new TIntFloatHashMap(vector.size());
TIntFloatIterator iter2 = vector.iterator();
while (iter2.hasNext())
{
iter2.advance();
normalized.put(iter2.key(),iter2.value() / total);
}
return normalized;
}
项目:ADW
文件:SemSigUtilsTest.java
@Test
public void testGetSortedIndices()
{
TIntFloatMap m = new TIntFloatHashMap();
m.put(0,1f);
m.put(1,10f);
m.put(2,5f);
m.put(3,2f);
int[] sorted = SemSigUtils.getSortedIndices(m);
assertEquals(4,sorted.length);
assertEquals(1,sorted[0]);
assertEquals(2,sorted[1]);
assertEquals(3,sorted[2]);
assertEquals(0,sorted[3]);
}
项目:fudannlp
文件:KMeansWordCluster.java
private float distanceEuclidean(int n,float baseDistance) {
HashSparseVector center = classCenter.get(n);
int count = classCount.get(n);
float dist = baseDistance / (count * count);
TIntFloatHashMap data = center.data;
TIntFloatIterator it = sv.data.iterator();
while (it.hasNext()) {
it.advance();
int key = it.key();
if (!data.containsKey(key)) {
dist += it.value() * it.value();
}
else {
float temp = data.get(key) / count;
dist -= temp * temp;
dist += (it.value() - temp) * (it.value() - temp);
}
}
return dist;
}
项目:fudannlp
文件:KMeansWordCluster.java
private void updateBaseDist(int classid,base);
}
项目:fudannlp
文件:WordCluster.java
/**
* 一次性统计概率,节约时间
*/
private void statisticProb() {
System.out.println("统计概率");
float totalword = alpahbet.size();
TIntFloatIterator it = wordProb.iterator();
while(it.hasNext()){
it.advance();
float v = it.value()/totalword;
it.setValue(v);
Cluster cluster = new Cluster(it.key(),v,alpahbet.lookupString(it.key()));
clusters.put(it.key(),cluster);
}
TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator();
while(it1.hasNext()){
it1.advance();
TIntFloatHashMap map = it1.value();
TIntFloatIterator it2 = map.iterator();
while(it2.hasNext()){
it2.advance();
it2.setValue(it2.value()/totalword);
}
}
}
项目:fudannlp
文件:WordCluster.java
private float getweight(int c1,min;
if(c1<=c2){
max = c2;
min = c1;
}else{
max = c1;
min = c2;
}
float w;
TIntFloatHashMap map2 = wcc.get(min);
if(map2==null){
w = 0;
}else
w = map2.get(max);
return w;
}
项目:fudannlp
文件:MyArrays.java
/**
* 得到总能量值大于thres的元素对应的下标
*
* @param data 稀疏向量
* @param thres
* @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
*/
public static int[][] getTop(TIntFloatHashMap data,idx.length);
return a;
}
项目:fudannlp
文件:MyHashSparseArrays.java
/**
* 得到总能量值大于thres的元素对应的下标
*
* @param data 稀疏向量
* @param thres
* @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
*/
public static int[][] getTop(TIntFloatHashMap data,idx.length);
return a;
}
项目:fudannlp
文件:MyCollection.java
/**
* 由大到小排序
* @param map
* @return 数组下标
*/
public static int[] sort(TIntFloatHashMap tmap) {
HashMap<Integer,Math.abs(val));
}
it = null;
List<Entry> list = sort(map);
int[] idx = new int[list.size()];
Iterator<Entry> it1 = list.iterator();
int i=0;
while (it1.hasNext()) {
Entry entry = it1.next();
idx[i++] = (Integer) entry.getKey();
}
return idx;
}
项目:artista
文件:HoughCircles.java
/**
* Construct with the given parameters.
*
* @param minRad minimum search radius
* @param maxRad maximum search radius
*/
public HoughCircles(int minRad,int nDegree) {
super();
this.minRad = minRad;
if(this.minRad <= 0) this.minRad = 1;
this.maxRad = maxRad;
this.radmap = new TIntObjectHashMap<TIntObjectHashMap<TIntFloatHashMap>>();
this.radIncr = radIncrement;
this.nRadius = (maxRad-minRad) / this.radIncr;
this.nDegree = nDegree;
this.cosanglemap = new float[nRadius][nDegree];
this.sinanglemap = new float[nRadius][nDegree];
for (int radIndex=0; radIndex<this.nRadius; radIndex++) {
for (int angIndex=0; angIndex<nDegree; angIndex++) {
double ang = angIndex * (2 * PI / nDegree);
double rad = minRad + (radIndex * this.radIncr);
this.cosanglemap [radIndex][angIndex] = (float) (rad*cos(ang));
this.sinanglemap [radIndex][angIndex] = (float) (rad*sin(ang));
}
}
}
项目:lodreclib
文件:ItemPreProcessing.java
private void writeData(String filename) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
TIntFloatHashMap m;
StringBuffer buf;
for (int id : items) {
buf = new StringBuffer();
buf.append(id + "\t");
m = map_item_intFeatures.get(id);
int[] fIDs = m.keys();
Arrays.sort(fIDs);
for (int i = 0; i < fIDs.length; i++) {
buf.append(fIDs[i] + ":" + m.get(fIDs[i]) + " ");
}
writer.append(buf);
writer.newLine();
}
writer.flush();
writer.close();
} catch (IOException ex) {
System.out.println(ex.getMessage());
}
}
项目:lodreclib
文件:UserModelRecommenderWorker.java
public UserModelRecommenderWorker(int u,BufferedWriter bw,TIntObjectHashMap<TIntFloatHashMap> map_item_intFeatures,Evaluator trainEval,Evaluator validEval,boolean silent,int topN,int num_features,List<Double> listC,List<Double> listEps,List<Integer> listSolverType,Map<Integer,Float> userTrainRatings,Float> userValRatings,boolean implicit,int nValidNegEx,boolean addNegValidationEx,int timesRealFb,int minTrainEx,HashSet<Integer> items,float relUnknownItems,int topK,String metric) {
this.topK = topK;
this.metric = metric;
this.u = u;
this.bw = bw;
this.map_item_intFeatures = map_item_intFeatures;
this.trainEval = trainEval;
this.validEval = validEval;
this.silent = silent;
this.topN = topN;
this.num_features = num_features;
this.listC = listC;
this.listEps = listEps;
this.listSolverType = listSolverType;
this.userTrainRatings = userTrainRatings;
this.userValRatings = userValRatings;
this.implicit = implicit;
this.relUnknownItems = relUnknownItems;
this.nValidNegEx = nValidNegEx;
this.timesRealFb = timesRealFb;
this.minTrainEx = minTrainEx;
this.items = items;
this.addNegValidationEx = addNegValidationEx;
originalTrainItems = new HashSet<Integer>();
}
项目:lodreclib
文件:UserProfileSimilarityRecommenderWorker.java
public UserProfileSimilarityRecommenderWorker(int u,Float evalRatingThresh) {
this.u = u;
this.items = items;
this.bw = bw;
this.map_item_intFeatures = map_item_intFeatures;
this.topN = topN;
this.userTrainRatings = userTrainRatings;
this.implicit = implicit;
this.evalRatingThresh = evalRatingThresh;
}
项目:lodreclib
文件:UserProfileSimilarityRecommenderWorker.java
private float cmpCosineSim(TIntFloatHashMap v1,TIntFloatHashMap v2) {
TIntHashSet inters = new TIntHashSet();
inters.addAll(v1.keySet());
inters.retainAll(v2.keySet());
if (inters.size() == 0)
return 0;
else {
int i = 0;
TIntIterator it = inters.iterator();
float num = 0;
float norm_v1 = 0;
float norm_v2 = 0;
while (it.hasNext()) {
i = it.next();
num += v1.get(i) * v2.get(i);
}
for (int k1 : v1.keys())
norm_v1 += (v1.get(k1) * v1.get(k1));
for (int k2 : v2.keys())
norm_v2 += (v2.get(k2) * v2.get(k2));
return num / (float) (Math.sqrt(norm_v1) * Math.sqrt(norm_v2));
}
}
项目:lodreclib
文件:Evaluator.java
private void loadItemFeatureData(String file_name) {
BufferedReader br;
try {
br = new BufferedReader(new FileReader(file_name));
String line = null;
int count = 0;
while ((line = br.readLine()) != null) {
try {
String[] vals = line.split("\t");
int id = Integer.parseInt(vals[0]);
if (items.contains(id)) {
map_item_intFeatures.put(id,new TIntFloatHashMap());
String[] values = vals[1].trim().split(" ");
for (int i = 0; i < values.length; i++) {
String[] pair = values[i].split(":");
int fId = Integer.parseInt(pair[0]);
float fVal = Float.parseFloat(pair[1]);
map_item_intFeatures.get(id).put(fId,fVal);
}
count++;
}
} catch (Exception ex) {
// System.out.println(ex.getMessage());
// System.out.println(line);
}
}
logger.info("item metadata loaded for evaluation - " + count
+ " items");
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
项目:lodreclib
文件:TextFileUtils.java
public static TIntObjectHashMap<TIntFloatHashMap> loadInputUsersRatings(String file) {
TIntObjectHashMap<TIntFloatHashMap> user_rating =new TIntObjectHashMap<TIntFloatHashMap>();
try {
BufferedReader br = new BufferedReader(new FileReader(file));
String line = null;
float rate;
int user_id,item_id;
while ((line = br.readLine()) != null) {
String[] vals = line.split("\t");
if(vals.length==2)
rate=1;
else
rate = Float.parseFloat(vals[2]);
user_id = Integer.parseInt(vals[0]);
item_id = Integer.parseInt(vals[1]);
user_rating.putIfAbsent(user_id,new TIntFloatHashMap());
user_rating.get(user_id).put(item_id,rate);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return user_rating;
}
项目:lodreclib
文件:TextFileUtils.java
public static void loadInputUsersRatings(String file,TIntObjectHashMap<TIntFloatHashMap> user_rating,TFloatHashSet labels){
try{
BufferedReader br = new BufferedReader(new FileReader(file));
String line = null;
float rate;
int user_id,item_id;
while((line=br.readLine()) != null){
String[] vals = line.split("\t");
rate = Float.parseFloat(vals[2]);
user_id = Integer.parseInt(vals[0]);
item_id = Integer.parseInt(vals[1]);
user_rating.putIfAbsent(user_id,rate);
labels.add(rate);
}
br.close();
}
catch(Exception e){
e.printStackTrace();
}
}
项目:lodreclib
文件:UserPathExtractorWorker.java
/**
* Constuctor
*/
public UserPathExtractorWorker(int user_id,TIntFloatHashMap trainRatings,TIntFloatHashMap validationRatings,ArrayList<String> items_id,BufferedWriter train_file,BufferedWriter validation_file,BufferedWriter test_file,boolean normalize,THashMap<String,String> items_path_index,String path_file,TObjectIntHashMap<String> path_index,String> paths,int user_items_sampling,float ratesThreshold,TIntObjectHashMap<TIntHashSet> items_link) {
this.user_id = user_id;
this.items_id = items_id;
this.trainRatings = trainRatings;
this.validationRatings = validationRatings;
this.train_file = train_file;
this.validation_file = validation_file;
this.test_file = test_file;
this.normalize = normalize;
this.items_path_index = items_path_index;
this.path_index = path_index;
this.paths = paths;
this.path_file = path_file;
this.user_items_sampling = user_items_sampling;
this.ratesThreshold = ratesThreshold;
this.items_link = items_link;
}
项目:wikit
文件:LinkRelatedness.java
private TIntFloatMap makeOutlinkVector(TIntSet links) {
TIntFloatMap vector = new TIntFloatHashMap();
for (int wpId : links.toArray()) {
vector.put(wpId,(float) Math.log(1.0 * linkCache.getTotalPages() / linkCache
.getInlinks(wpId).size()));
}
if(wlmExtended) {
TIntFloatMap vector2 = new TIntFloatHashMap();
//考虑二级链接
for (int id1 : links.toArray()) {
for (int id2 : linkCache.getOutlinks(id1).toArray()) {
double tfidf = Math.log(1.0 * linkCache.getTotalPages()
/ linkCache.getInlinks(id2).size());
float w = (float) tfidf * vector.get(id1);
float old = 0;
if (vector2.containsKey(id2)) {
old = vector2.get(id2);
}
vector2.put(id2,old + w);
}
}
vector.putAll(vector2);
}
return vector;
}
项目:fnlp
文件:WordCluster.java
/**
* 一次性统计概率,节约时间
*/
private void statisticProb() {
System.out.println("统计概率");
TIntFloatIterator it = wordProb.iterator();
while(it.hasNext()){
it.advance();
float v = it.value()/totalword;
it.setValue(v);
int key = it.key();
if(key<0)
continue;
Cluster cluster = new Cluster(key,alpahbet.lookupString(key));
clusters.put(key,cluster);
}
TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator();
while(it1.hasNext()){
it1.advance();
TIntFloatHashMap map = it1.value();
TIntFloatIterator it2 = map.iterator();
while(it2.hasNext()){
it2.advance();
it2.setValue(it2.value()/totalword);
}
}
}
项目:fnlp
文件:WordCluster.java
private float getProb(int c1,int c2) {
float p;
TIntFloatHashMap map = pcc.get(c1);
if(map == null){
p = 0f;
}else{
p = pcc.get(c1).get(c2);
}
return p;
}