jsdatachecker.min.js
9.95 KB
function ArrayUtils(){}function DataTypeConverter(){this._fields=[],this._numOfRows=0}function DataTypesUtils(){}ArrayUtils.TestAndSet=function(arr,key,object){return"undefined"==typeof arr?null:0==Array.isArray(arr)?null:("undefined"==typeof arr[key]&&(arr[key]=object),arr[key])},ArrayUtils.TestAndInitializeKey=function(obj,key,value){return"undefined"==typeof obj?null:("undefined"==typeof obj[key]&&(obj[key]=value),obj[key])},ArrayUtils.TestAndIncrement=function(arr,key){var exists=arr[key];return"undefined"==typeof exists&&(arr[key]=0),arr[key]++,arr},ArrayUtils.toFieldsArray=function(obj){var fields=[];return ArrayUtils.IteratorOverKeys(obj,function(field,key){field.key=key,fields.push(field)}),fields},ArrayUtils.IteratorOverKeys=function(arr,callback){for(var property in arr)if(arr.hasOwnProperty(property)){var item=arr[property];callback(item,property)}},ArrayUtils.FindMinMax=function(arr,fncompare){var max1=null,max2=null;for(var key in arr)null==max1||fncompare(arr[key],max1.value)?(max2=max1,max1={index:-1,key:key,value:arr[key]}):(null==max2||fncompare(arr[key],max2.value))&&(max2={index:-1,key:key,value:arr[key]});return{first:max1,second:max2}},ArrayUtils.isArray=function(arr){return Array.isArray(arr)?arr.length>0:!1},DataTypeConverter.TYPES={TEXT:{value:0,name:"TEXT"},CODE:{value:1,name:"CODE"},NUMBER:{value:2,name:"NUMBER"},OBJECT:{value:3,name:"OBJECT"},BOOL:{value:5,name:"BOOL"},CONST:{value:6,name:"CONST"},CATEGORY:{value:7,name:"CATEGORY"},DATETIME:{value:8,name:"DATETIME"},EMPTY:{value:101,name:"NULL"}},DataTypeConverter.SUBTYPES={PERCENTAGE:{value:1e3,name:"PERCENTAGE"},LATITUDE:{value:1001,name:"LATITUDE"},LONGITUDE:{value:1002,name:"LONGITUDE"}},DataTypeConverter.prototype=function(){var _analyseDataTypes=function(fields){ArrayUtils.IteratorOverKeys(fields,function(field){if(field._inferredTypes[DataTypeConverter.TYPES.CODE.name]){var confidence=field._inferredTypes[DataTypeConverter.TYPES.CODE.name]/field.numOfItems,_numericalInferredType=field._inferredTypes[DataTypeConverter.TYPES.NUMBER.name];return"undefined"!=typeof _numericalInferredType&&(confidence+=_numericalInferredType/field.numOfItems),field.type=DataTypeConverter.TYPES.CODE.name,void(field.typeConfidence=confidence)}var max=ArrayUtils.FindMinMax(field._inferredTypes,function(curval,lastval){return curval>lastval}),tkey=max.first.key;tkey===DataTypeConverter.TYPES.EMPTY.name&&null!=max.second&&"undefined"!=typeof max.second&&(tkey=max.second.key),field.type=tkey,field.typeConfidence=field._inferredTypes[max.first.key]/field.numOfItems})},_processInferType=function(value){if(null===value||"undefined"==typeof value)return DataTypeConverter.TYPES.EMPTY;if("object"==typeof value)return DataTypeConverter.TYPES.OBJECT;if(/^0[0-9]+$/.test(value))return DataTypeConverter.TYPES.CODE;var isnumber=DataTypesUtils.FilterFloat(value);if(isNaN(isnumber)!==!0)return DataTypeConverter.TYPES.NUMBER;var _date=DataTypesUtils.FilterDateTime(value);return 0==isNaN(_date)&&null!=_date?DataTypeConverter.TYPES.DATETIME:DataTypeConverter.TYPES.TEXT},jsonTraverse=function(json,fieldKeys,callback){var stack=[],numOfRows=0;for(stack.push({item:json,fieldKeyIndex:0});stack.length>0;){var stackTask=stack.pop(),item=stackTask.item,fieldKeyIndex=stackTask.fieldKeyIndex,fieldKey=fieldKeys[fieldKeyIndex];if("*"!=fieldKey||0!=ArrayUtils.isArray(item))if("*"!=fieldKey||1!=ArrayUtils.isArray(item)){var jsonSubtree=item[fieldKey];if(Array.isArray(jsonSubtree))for(var j=0;j<jsonSubtree.length;j++){var jsonItem=jsonSubtree[j];stack.push({item:jsonItem,fieldKeyIndex:fieldKeyIndex+1})}else stack.push({item:jsonSubtree,fieldKeyIndex:fieldKeyIndex+1})}else for(var cell,j=0;j<item.length&&(cell=item[j]);j++)stack.push({item:cell,fieldKeyIndex:fieldKeyIndex}),numOfRows++;else{var sProcessedKeys=fieldKeys.slice(0,fieldKeyIndex).toString();ArrayUtils.IteratorOverKeys(item,function(value,key){var curKey=sProcessedKeys+(sProcessedKeys.length>0?",":"")+key,_value=callback(value,key,curKey,numOfRows);item[key]=_value}),numOfRows++}}};return{constructor:DataTypeConverter,cast:function(metadata,options){return"undefined"!=typeof options&&null!=options||(options={castThresholdConfidence:1,castIfNull:!1}),this.convert(metadata,options)},convert:function(metadata,options){var lastRowIndex=0,numOfRows=0,numOfValues=0,datasetErrors=0,datasetMissingValues=0;return"undefined"!=typeof options&&null!=options||(options={castThresholdConfidence:1,castIfNull:!1}),jsonTraverse(metadata.dataset,metadata.fieldKeys,function(value,key,traversedKeys,rowIndex){var inferredType=metadata.types[traversedKeys];numOfValues++,lastRowIndex!=rowIndex&&(lastRowIndex=rowIndex,numOfRows++),null==value||"undefined"==typeof value||0==(value+"").length;var isCast=inferredType.typeConfidence>=options.castThresholdConfidence;if(inferredType.type==DataTypeConverter.TYPES.NUMBER.name&&isCast){var number=parseFloat(value);return isNaN(number)?(datasetErrors++,value):number}return value}),metadata.qualityIndex.notNullValues=(numOfValues-datasetMissingValues)/numOfValues,metadata.qualityIndex.errors=(numOfValues-datasetErrors)/numOfValues,metadata},inferJsonDataType:function(json,fieldKeys){var stack=[],fieldsType={},numOfRows=0;if("undefined"==typeof fieldKeys)throw"IllegalArgumentException: undefined json path to analyse.";for(stack.push({item:json,fieldKeyIndex:0});stack.length>0;){var stackTask=stack.pop(),item=stackTask.item,fieldKeyIndex=stackTask.fieldKeyIndex,fieldKey=fieldKeys[fieldKeyIndex];if("*"!=fieldKey||0!=ArrayUtils.isArray(item))if("*"==fieldKey&&ArrayUtils.isArray(item))for(var cell,j=0;j<item.length&&(cell=item[j]);j++)stack.push({item:cell,fieldKeyIndex:fieldKeyIndex}),numOfRows++;else{var jsonSubtree=item[fieldKey];if(Array.isArray(jsonSubtree))for(var j=0;j<jsonSubtree.length;j++){var jsonItem=jsonSubtree[j];stack.push({item:jsonItem,fieldKeyIndex:fieldKeyIndex+1})}else stack.push({item:jsonSubtree,fieldKeyIndex:fieldKeyIndex+1})}else{var sProcessedKeys=fieldKeys.slice(0,fieldKeyIndex).toString();ArrayUtils.IteratorOverKeys(item,function(item,key){var inferredType=_processInferType(item),curKey=sProcessedKeys+(0==sProcessedKeys.length?"":",")+key,fieldType=ArrayUtils.TestAndInitializeKey(fieldsType,curKey,{name:curKey,_inferredTypes:[],_inferredValues:[],numOfItems:0});fieldType.numOfItems++,ArrayUtils.TestAndIncrement(fieldType._inferredTypes,inferredType.name),inferredType===DataTypeConverter.TYPES.TEXT&&ArrayUtils.TestAndIncrement(fieldType._inferredValues,item)}),numOfRows++}}_analyseDataTypes(fieldsType);var quality={homogeneity:1,completeness:1,totalNullValues:0,totalValues:0};ArrayUtils.IteratorOverKeys(fieldsType,function(fieldType){quality.totalValues+=fieldType.numOfItems,quality.homogeneity*=fieldType.typeConfidence,fieldType.totalNullValues=0,fieldType._inferredTypes.hasOwnProperty(DataTypeConverter.TYPES.EMPTY.name)&&(fieldType.totalNullValues=fieldType._inferredTypes[DataTypeConverter.TYPES.EMPTY.name],quality.totalNullValues+=fieldType.totalNullValues)}),quality.homogeneity=Math.round(100*quality.homogeneity)/100;var totFullValues=quality.totalValues-quality.totalNullValues;quality.completeness=Math.round(totFullValues/quality.totalValues*100)/100;var warningsTextual="";return ArrayUtils.IteratorOverKeys(fieldsType,function(fieldType){fieldType.errorsDescription="";var description="";if(fieldType.typeConfidence<1){var incorrect=fieldType.numOfItems-fieldType.totalNullValues-fieldType._inferredTypes[fieldType.type];if(incorrect>0){description+="The column <"+fieldType.name+"> has the type <"+fieldType.type+">";var verb=1==incorrect?" value is":" values are";description+=", but "+incorrect+verb+" not a "+fieldType.type}}fieldType.totalNullValues>0&&(description+="The column <"+fieldType.name+"> has "+fieldType.totalNullValues+" EMPTY value",fieldType.totalNullValues>1&&(description+="s")),description.length>0&&(description+="."),fieldType.errorsDescription=description,warningsTextual+=description}),{dataset:json,fieldKeys:fieldKeys,types:fieldsType,qualityIndex:quality,warningsTextual:warningsTextual}},inferDataTypeOfValue:function(value){return _processInferType(value)}}}(),DataTypesUtils.FilterTime=function(value){var expTime=/^[0-9]{2}:[0-9]{2}(:[0-9]{2})?(\+[0-9]{2}:[0-9]{2})?$/;if(0==expTime.test(value))return null;var splitted=value.split(/[:|\+]/),expNumber=/^[0-9]{2}$/,HH=expNumber.test(splitted[0])?parseInt(splitted[0]):0,MM=expNumber.test(splitted[1])?parseInt(splitted[1]):0,SS=splitted.length>=3&&expNumber.test(splitted[2])?parseInt(splitted[2]):0,dt=new Date;return dt.setHours(HH),dt.setMinutes(MM),dt.setSeconds(SS),dt},DataTypesUtils.FilterDateTime=function(value){var _dtSplitted=value.split(/[T|\s]/);if(2==_dtSplitted.length){var dtTime=DataTypesUtils.FilterTime(_dtSplitted[1]);if(null==dtTime)return null;var dtDateTime=DataTypesUtils.FilterDate(_dtSplitted[0],dtTime);return dtDateTime}var dtDate=DataTypesUtils.FilterDate(value);if(null!=dtDate)return dtDate;var dtTime=DataTypesUtils.FilterTime(value);return dtTime},DataTypesUtils.FilterDate=function(value,dtDate){if(null==dtDate&&(dtDate=new Date),/^[0-9][0-9][0-9][0-9]\-[0-9][0-9]$/.test(value)){var year=parseInt(value.substring(0,4)),month=parseInt(value.substring(5));return dtDate.setYear(year),dtDate.setMonth(month),dtDate}var expDate=/^[0-9]{4}(\-|\/)[0-9]{2}((\-|\/)[0-9]{2})?$/;if(expDate.test(value)){var splitted=value.split(/[\-|\/]/),year=parseInt(splitted[0]),month=parseInt(splitted[1]),day=3==splitted.length?parseInt(splitted[2]):0;return dtDate.setYear(year),dtDate.setMonth(month),dtDate.setDate(day),dtDate}if(expDate=/^[0-9]{2}(\-|\/)[0-9]{2}(\-|\/)[0-9]{4}$/,expDate.test(value)){var splitted=value.split(/[\-|\/]/),year=parseInt(splitted[2]),month=parseInt(splitted[1]),day=parseInt(splitted[0]);return dtDate.setYear(year),dtDate.setMonth(month),dtDate.setDate(day),dtDate}return null},DataTypesUtils.FilterFloat=function(value){return/^(\-|\+)?((0|([1-9][0-9]*))(\.[0-9]+)?|Infinity)$/.test(value)?Number(value):NaN},DataTypesUtils.DecimalPlaces=function(num){var match=(""+num).match(/(?:\.(\d+))?(?:[eE]([+-]?\d+))?$/);return match?Math.max(0,(match[1]?match[1].length:0)-(match[2]?+match[2]:0)):0};