Files
lan-manager/web/node_modules/@antv/algorithm/es/utils/data-preprocessing.js
openclaw 0a5f6a8047 Initial commit: Lan-manager project code
- Go backend (server/)
- Frontend (web/, server/static/)
- Database and deployment files
- Scripts and docs

Co-Authored-By: 狸花猫/Claude-Qwen3.6-Plus 🐾
2026-04-20 00:52:58 +08:00

112 lines
3.5 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { uniq } from '@antv/util';
import { DistanceType } from '../types';
import Vector from './vector';
/**
* 获取数据中所有的属性及其对应的值
* @param dataList 数据集
* @param involvedKeys 参与计算的key集合
* @param uninvolvedKeys 不参与计算的key集合
*/
export var getAllKeyValueMap = function getAllKeyValueMap(dataList, involvedKeys, uninvolvedKeys) {
var keys = [];
// 指定了参与计算的keys时使用指定的keys
if (involvedKeys === null || involvedKeys === void 0 ? void 0 : involvedKeys.length) {
keys = involvedKeys;
} else {
// 未指定抽取的keys时提取数据中所有的key
dataList.forEach(function (data) {
keys = keys.concat(Object.keys(data));
});
keys = uniq(keys);
}
// 获取所有值非空的key的value数组
var allKeyValueMap = {};
keys.forEach(function (key) {
var value = [];
dataList.forEach(function (data) {
if (data[key] !== undefined && data[key] !== '') {
value.push(data[key]);
}
});
if (value.length && !(uninvolvedKeys === null || uninvolvedKeys === void 0 ? void 0 : uninvolvedKeys.includes(key))) {
allKeyValueMap[key] = uniq(value);
}
});
return allKeyValueMap;
};
/**
* one-hot编码数据特征提取
* @param dataList 数据集
* @param involvedKeys 参与计算的的key集合
* @param uninvolvedKeys 不参与计算的key集合
*/
export var oneHot = function oneHot(dataList, involvedKeys, uninvolvedKeys) {
// 获取数据中所有的属性/特征及其对应的值
var allKeyValueMap = getAllKeyValueMap(dataList, involvedKeys, uninvolvedKeys);
var oneHotCode = [];
if (!Object.keys(allKeyValueMap).length) {
return oneHotCode;
}
// 获取所有的属性/特征值
var allValue = Object.values(allKeyValueMap);
// 是否所有属性/特征的值都是数值型
var isAllNumber = allValue.every(function (value) {
return value.every(function (item) {
return typeof item === 'number';
});
});
// 对数据进行one-hot编码
dataList.forEach(function (data, index) {
var code = [];
Object.keys(allKeyValueMap).forEach(function (key) {
var keyValue = data[key];
var allKeyValue = allKeyValueMap[key];
var valueIndex = allKeyValue.findIndex(function (value) {
return keyValue === value;
});
var subCode = [];
// 如果属性/特征所有的值都能转成数值型不满足分箱则直接用值todo: 为了收敛更快,需做归一化处理)
if (isAllNumber) {
subCode.push(keyValue);
} else {
// 进行one-hot编码
for (var i = 0; i < allKeyValue.length; i++) {
if (i === valueIndex) {
subCode.push(1);
} else {
subCode.push(0);
}
}
}
code = code.concat(subCode);
});
oneHotCode[index] = code;
});
return oneHotCode;
};
/**
* getDistance获取两个元素之间的距离
* @param item
* @param otherItem
* @param distanceType 距离类型
* @param graphData 图数据
*/
export var getDistance = function getDistance(item, otherItem, distanceType, graphData) {
if (distanceType === void 0) {
distanceType = DistanceType.EuclideanDistance;
}
var distance = 0;
switch (distanceType) {
case DistanceType.EuclideanDistance:
distance = new Vector(item).euclideanDistance(new Vector(otherItem));
break;
default:
break;
}
return distance;
};
export default {
getAllKeyValueMap: getAllKeyValueMap,
oneHot: oneHot,
getDistance: getDistance
};