Commit 58596c77 authored by Adam Rousell's avatar Adam Rousell
Browse files

Added ability to use url as well as file for data upload

parent edf9a7dd
......@@ -23,139 +23,141 @@
exports.clean = (data, rule) => {
let cleanedData = JSON.parse(JSON.stringify(data));
// Romve the features
cleanedData['features'] = [];
let fc = data.features.length;
// analyse the rule to determine what to do
if(rule.type === 'attribute') {
// cleaning based on attribute values
// Only operators allowed are = and !=
// are we working with presence of attibute or looking for a particular value?
if(rule.value === '*') {
// just looking for the attribute presence
data.features.forEach(feature => {
// are we including or excluding
if(rule.operator === 'eq') {
if(feature.properties.hasOwnProperty(rule.field)) {
cleanedData['features'].push(feature);
}
} else {
if(!feature.properties.hasOwnProperty(rule.field)) {
cleanedData['features'].push(feature);
}
}
});
} else {
// looking for records with particular attribute values
try {
// Romve the features
let fc = data.features.length;
cleanedData['features'] = [];
// analyse the rule to determine what to do
if(rule.type === 'attribute') {
// cleaning based on attribute values
// Only operators allowed are = and !=
// are we working with presence of attibute or looking for a particular value?
if(rule.value === '*') {
// just looking for the attribute presence
data.features.forEach(feature => {
// are we including or excluding
if(rule.operator === 'eq') {
if(feature.properties.hasOwnProperty(rule.field)) {
cleanedData['features'].push(feature);
}
} else {
if(!feature.properties.hasOwnProperty(rule.field)) {
cleanedData['features'].push(feature);
}
}
});
} else {
// looking for records with particular attribute values
data.features.forEach(feature => {
// are we including or excluding
if(rule.operator === 'eq') {
if(feature.properties.hasOwnProperty(rule.field) && feature.properties[rule.field] === rule.value) {
cleanedData['features'].push(feature);
}
} else {
if(!feature.properties.hasOwnProperty(rule.field) || !feature.properties[rule.field] === rule.value) {
cleanedData['features'].push(feature);
}
}
});
}
}
if(rule.type === 'age') {
// clean based on the age of the data
let cDate = Date.parse(rule.value);
console.log(cDate);
data.features.forEach(feature => {
// are we including or excluding
if(rule.operator === 'eq') {
if(feature.properties.hasOwnProperty(rule.field) && feature.properties[rule.field] === rule.value) {
cleanedData['features'].push(feature);
}
} else {
if(!feature.properties.hasOwnProperty(rule.field) || !feature.properties[rule.field] === rule.value) {
cleanedData['features'].push(feature);
}
}
});
// check which operator
let fDate = Date.parse(feature.properties[rule.field]);
switch(rule.operator) {
case 'eq':
// is the date equal
if(Math.round(cDate / 1000) === Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'neq':
// is the date equal
if(Math.round(cDate / 1000) !== Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'gt':
// is the date younger
if(Math.round(cDate / 1000) > Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'lt':
// is the date older
if(Math.round(cDate / 1000) < Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'gte':
if(Math.round(cDate / 1000) >= Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'lte':
if(Math.round(cDate / 1000) <= Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
}
});
}
}
if(rule.type === 'age') {
// clean based on the age of the data
let cDate = Date.parse(rule.value);
console.log(cDate);
data.features.forEach(feature => {
// check which operator
let fDate = Date.parse(feature.properties[rule.field]);
switch(rule.operator) {
case 'eq':
// is the date equal
if(Math.round(cDate / 1000) === Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'neq':
// is the date equal
if(Math.round(cDate / 1000) !== Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'gt':
// is the date younger
if(Math.round(cDate / 1000) > Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'lt':
// is the date older
if(Math.round(cDate / 1000) < Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'gte':
if(Math.round(cDate / 1000) >= Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
case 'lte':
if(Math.round(cDate / 1000) <= Math.round(fDate / 1000)) {
cleanedData['features'].push(feature);
}
break;
}
});
}
if(rule.type === 'count') {
// clean based on the number of attributes
let v = parseInt(rule.value);
data.features.forEach(feature => {
// count the number of elements in properties
let metacnt = 0;
Object.keys(feature).forEach(key => {
if(key !== 'properties' && key !== 'geometry')
metacnt = metacnt +1;
if(rule.type === 'count') {
// clean based on the number of attributes
let v = parseInt(rule.value);
data.features.forEach(feature => {
// count the number of elements in properties
let metacnt = 0;
Object.keys(feature).forEach(key => {
if(key !== 'properties' && key !== 'geometry')
metacnt = metacnt +1;
});
let cnt = metacnt + Object.keys(feature['properties']).length;
switch(rule.operator) {
case 'eq':
// is the date equal
if(v === cnt) {
cleanedData['features'].push(feature);
}
break;
case 'neq':
// is the date younger
if(v !== cnt) {
cleanedData['features'].push(feature);
}
break;
case 'lt':
// is the date older
if(cnt < v) {
cleanedData['features'].push(feature);
}
break;
case 'gt':
// is the date older
if(cnt > v) {
console.log(cnt + ', ' + v );
cleanedData['features'].push(feature);
}
break;
case 'gte':
if(cnt >= v) {
cleanedData['features'].push(feature);
}
break;
case 'lte':
if(cnt <= v) {
cleanedData['features'].push(feature);
}
break;
}
});
let cnt = metacnt + Object.keys(feature['properties']).length;
switch(rule.operator) {
case 'eq':
// is the date equal
if(v === cnt) {
cleanedData['features'].push(feature);
}
break;
case 'neq':
// is the date younger
if(v !== cnt) {
cleanedData['features'].push(feature);
}
break;
case 'lt':
// is the date older
if(cnt < v) {
cleanedData['features'].push(feature);
}
break;
case 'gt':
// is the date older
if(cnt > v) {
console.log(cnt + ', ' + v );
cleanedData['features'].push(feature);
}
break;
case 'gte':
if(cnt >= v) {
cleanedData['features'].push(feature);
}
break;
case 'lte':
if(cnt <= v) {
cleanedData['features'].push(feature);
}
break;
}
});
}
}
} catch (error) { }
return cleanedData;
};
......@@ -65,4 +65,26 @@
.no-data {
width: 100%;
}
.upload-form {
border: 2px solid #555555;
background-color: #FFFFFF;
margin: 5px;
overflow: auto;
clear: both;
padding: 15px;
color: #000000;
}
#cleanDataHead {
cursor: pointer;
}
#cleanDataInstructions {
margin-bottom: 20px;
}
#cleanDataContent {
display: none;
}
\ No newline at end of file
......@@ -11,7 +11,7 @@ let submitForm = () => {
$('#operatorRequired').css('display', 'none');
$('#valueRequired').css('display', 'none');
if($('#jsonfile').prop('files').length == 0 ) {
if($('#jsonfile').prop('files').length == 0 && $('#dataUrl').val() === '') {
// No file has been selected
$('#fileRequired').css('display', 'block');
submit = false;
......
......@@ -3,7 +3,7 @@
let submitForm = () => {
// Method for verifying that data has been entered prior to submitting
let fileBox = $('#jsonfile');
if(fileBox.prop('files').length > 0 ) {
if(fileBox.prop('files').length > 0 || $('#dataUrl').val() !== '') {
$('#uploadForm').submit();
} else {
// No file has been selected
......
......@@ -11,6 +11,8 @@ var langs = {
},
analyse: {
pageTitle: 'WeGovNow Data Check - Analyse',
cleanDataHead: 'Dataset Cleaning',
cleanDataInstructions: 'Using the following fields you can selectively remove data from the dataset. This functionality is limited to GeoJSON datasets.',
cleanFileLabel: 'File (JSON): (*)',
analyseCleanTypeLabel: 'Cleaning method (*)',
cleanType: {
......@@ -50,6 +52,8 @@ var langs = {
},
analyse: {
pageTitle: 'WeGovNow Data Check - Analysieren',
cleanDataHead: 'Dataset Reinigung',
cleanDataInstructions: 'Mit den folgenden Feldern können Sie selektiv Daten aus dem Dataset entfernen. Diese Funktionalität ist auf GeoJSON-Datasets beschränkt.',
cleanFileLabel: 'Datei (JSON): (*)',
analyseCleanTypeLabel: 'Reinigungsmethode (*)',
cleanType: {
......@@ -89,6 +93,8 @@ var langs = {
},
analyse: {
pageTitle: 'WeGovNow Data Check - Analizza',
cleanDataHead: 'Pulizia Dataset',
cleanDataInstructions: 'Utilizzando i seguenti campi è possibile eliminare selettivamente i dati dal set di dati. Questa funzionalità è limitata ai set di dati GeoJSON.',
cleanFileLabel: 'File (JSON): (*)',
analyseCleanTypeLabel: 'Metodo di pulizia (*)',
cleanType: {
......
......@@ -8,28 +8,69 @@ var cleanData = require('../modules/clean-data');
var upload = multer();
var router = express.Router();
router.post('/', upload.single('jsonfile'), function(req, res, next) {
// First we need to read the json data
var srcData = null;
try {
srcData = JSON.parse(req.file.buffer);
} catch(err) {
// Invalid JSON data
return next(err);
}
srcData['fileSize']=req.file.buffer.byteLength;
var data = loaddata.collapse(JSON.parse(JSON.stringify(srcData)));
//console.log(data);
var locale = req.query['locale'];//'de';
var srcData = '';
if(req.file) {
srcData = req.file.buffer;
try {
srcData = JSON.parse(srcData);
} catch(err) {
// Invalid JSON data
console.log("invalid json");
return next(err);
}
srcData['fileSize']=req.file.buffer.byteLength;
var data = loaddata.collapse(JSON.parse(JSON.stringify(srcData)));
//console.log(data);
var locale = req.query['locale'];//'de';
var aResults = analysismethods.analyse(srcData, data, locale);
res.render('analyse', { metrics: aResults });
var aResults = analysismethods.analyse(srcData, data, locale);
//console.log({ metrics: aResults});
res.render('analyse', { metrics: aResults });
} else {
if(req.body['dataUrl']) {
// Check if we are requesting a http or https
var isHttps = req.body.dataUrl.startsWith('https://');
var http = isHttps ? require('https') : require('http');
console.log(req.body.dataUrl)
var request = http.get(req.body.dataUrl, function(response) {
var str = '';
response.on('data', function (data) {
str += data;
srcData = data;
console.log(str);
try {
srcData = JSON.parse(srcData);
} catch(err) {
// Invalid JSON data
console.log("invalid json");
return next(err);
}
srcData['fileSize'] = data.byteLength;
var data = loaddata.collapse(JSON.parse(JSON.stringify(srcData)));
var locale = req.query['locale'];//'de';
var aResults = analysismethods.analyse(srcData, data, locale);
res.render('analyse', { metrics: aResults });
});
});
} else {
console.log("No data");
next("No Data!");
}
}
});
router.post('/clean', upload.single('jsonfile'), function(req, res, next) {
/*router.post('/clean', upload.single('jsonfile'), function(req, res, next) {
// make sure we have the data
if(!(req.file && req.body['cleanType'] && req.body['cleanField'] && req.body['cleanOperator'] && req.body['cleanValue'] )) {
......@@ -51,18 +92,6 @@ router.post('/clean', upload.single('jsonfile'), function(req, res, next) {
operator: req.body.cleanOperator,
value: req.body.cleanValue
};
/*var rule = {
type: 'age',
field: '@timestamp',
operator: '<',
value: '2017-01-24T12:08:08Z'
}
var rule = {
type: 'attribute',
field: 'amenity',
operator: '=',
value: 'hospital'
}*/
// Now clean the data based on rules
var data = cleanData.clean(srcData, rule);
......@@ -70,6 +99,74 @@ router.post('/clean', upload.single('jsonfile'), function(req, res, next) {
res.setHeader('Content-disposition', 'attachment; filename=cleaned.json');
res.setHeader('Content-type', 'application/json');
res.send(JSON.stringify(data, null, '\t'));
});*/
router.post('/clean', upload.single('jsonfile'), function(req, res, next) {
var srcData = '';
if(req.file) {
srcData = req.file.buffer;
try {
srcData = JSON.parse(srcData);
} catch(err) {
// Invalid JSON data
console.log("invalid json");
return next(err);
}
var rule = {
type: req.body.cleanType,
field: req.body.cleanField,
operator: req.body.cleanOperator,
value: req.body.cleanValue
};
// Now clean the data based on rules
var data = cleanData.clean(srcData, rule);
// return the cleaned data as a file
res.setHeader('Content-disposition', 'attachment; filename=cleaned.json');
res.setHeader('Content-type', 'application/json');
res.send(JSON.stringify(data, null, '\t'));
} else {
if(req.body['dataUrl']) {
// Check if we are requesting a http or https
var isHttps = req.body.dataUrl.startsWith('https://');
var http = isHttps ? require('https') : require('http');
var request = http.get(req.body.dataUrl, function(response) {
var str = '';
response.on('data', function (data) {
str += data;
srcData = data;
try {
srcData = JSON.parse(srcData);
} catch(err) {
// Invalid JSON data
console.log("invalid json");
return next(err);
}
var rule = {
type: req.body.cleanType,
field: req.body.cleanField,
operator: req.body.cleanOperator,
value: req.body.cleanValue
};
// Now clean the data based on rules
var data = cleanData.clean(srcData, rule);
// return the cleaned data as a file
res.setHeader('Content-disposition', 'attachment; filename=cleaned.json');
res.setHeader('Content-type', 'application/json');
res.send(JSON.stringify(data, null, '\t'));
});
});
} else {
console.log("No data");
next("No Data!");
}
}
});
module.exports = router;
......@@ -57,48 +57,57 @@ block content
.header
.upload-form
form#uploadForm(method='POST' action='analyse/clean' enctype='multipart/form-data')
div.form-group
label#cleanFileLabel(for='jsonfile') File (JSON):
input(type='file', placeholder='File', id='jsonfile', name='jsonfile')
#fileRequired.required File is required!
div.form-group
div.clean-options
div.clean-group
label#analyseCleanTypeLabel(for='cleanType') Cleaning method
| <br />
select(id='cleanType' name='cleanType', onClick='updateType()')
option(value='choose') Select
option(value='age') Age
option(value='count') Attribute Count
option(value='attribute') Attribute Value
#methodRequired.required Cleaning method is required!
div.clean-group
label#analyseCleanFieldLabel(for='cleanField') Field
| <br />
input(type='text' id='cleanField' name='cleanField')
#fieldRequired.required Field is required!
div.clean-group
label#analyseCleanOperatorLabel(for='cleanOperator') Operator
| <br />
select(id='cleanOperator' name='cleanOperator')
option(value="eq") equals
option(value="neq") does not equal
option(value="gt") is greater than
option(value="gte") is greater than or equal to
option(value="lt") is less than
option(value="lte") is less than or equal to
#operatorRequired.required Operator is required!
div.clean-group
label#analyseCleanValueLabel(for='cleanValue') Value
| <br />
input(type='text' id='cleanValue' name='cleanValue')
#valueRequired.required Value is required (use * for all)!
div.form-button-group
button#analyseCleanButton.btn.btn-primary(type='button' onClick='submitForm()') Clean Data
#cleanDataHead.analysis-name(onClick='toggleInfo("cleanDataContent")') Dataset cleaning
#cleanDataContent
#cleanDataInstructions Using the following fields you can selectively remove data from the dataset. This functionality is limited to GeoJSON datasets.
form#uploadForm(method='POST' action='analyse/clean' enctype='multipart/form-data')
div.form-group
label#cleanFileLabel(for='jsonfile') File (JSON):
input(type='file', placeholder='File', id='jsonfile', name='jsonfile')
div.form-group
label#indexUrlLabel(for='dataUrl') URL:
input(type='text' id='dataUrl' name='dataUrl')
#fileRequired.required A file or URL is required!
div.form-group
div.clean-options
div.clean-group
label#analyseCleanTypeLabel(for='cleanType') Cleaning method
| <br />
select(id='cleanType' name='cleanType', onClick='updateType()')
option(value='choose') Select
option(value='age') Age
option(value='count') Attribute Count
option(value='attribute') Attribute Value
#methodRequired.required Cleaning method is required!
div.clean-group
label#analyseCleanFieldLabel(for='cleanField') Field
| <br />
input(type='text' id='cleanField' name='cleanField')
#fieldRequired.required Field is required!
div.clean-group
label#analyseCleanOperatorLabel(for='cleanOperator') Operator
| <br />
select(id='cleanOperator' name='cleanOperator')