Compare commits

...

5 Commits

5 changed files with 204 additions and 65 deletions

21
langs/en.js Normal file
View File

@ -0,0 +1,21 @@
module.exports = {
concept: {
create: '^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)(?: that)?',
edit: '^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)',
parseValue: 'has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~',
parseParent: '^is an? ([a-zA-Z0-9 ]*)',
parseRel: '~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)',
parseSyn: '~ is expressed by ~ ([a-zA-Z0-9 ]*)'
},
instance: {
create: '^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')(?: that)?',
edit: 'the ([a-zA-Z0-9_ ]+) ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
parseRel: '(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_\' ]*)',
parseRawVal: '^has ([a-zA-Z0-9]*|\'[^\'\]*(?:\\.[^\'\]*)*\') as ([a-zA-Z0-9 ]*)',
parseInstanceVal: 'has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|\'[a-zA-Z0-9_ ]*\') as ([a-zzA-Z0-9 ]*)',
parseInstanceSubConcept: '(?:is| )?an? ([a-zA-Z0-9 ]*)',
parseInstanceSynonym: 'is expressed by (\'[a-zA-Z0-9 ]*\'|[a-zA-Z0-9]*)'
},
and: 'and',
value: 'value'
};

3
langs/index.js Normal file
View File

@ -0,0 +1,3 @@
var en = require('./en.js');
module.exports = {en};

View File

@ -16,6 +16,7 @@
*/ */
'use strict'; 'use strict';
const LanguageManager = require('./LanguageManager.js');
const CEAgent = require('./CEAgent.js'); const CEAgent = require('./CEAgent.js');
const CEParser = require('./CEParser.js'); const CEParser = require('./CEParser.js');
const QuestionParser = require('./QuestionParser.js'); const QuestionParser = require('./QuestionParser.js');
@ -118,6 +119,7 @@ class CENode {
const descendants = concept.descendants.concat(concept); const descendants = concept.descendants.concat(concept);
const childrenIds = []; const childrenIds = [];
for (const descendant of descendants) { childrenIds.push(descendant.id); } for (const descendant of descendants) { childrenIds.push(descendant.id); }
for (const ancestor of concept.ancestors) { childrenIds.push(ancestor.id); }
for (const instance of this.instances) { for (const instance of this.instances) {
if (instance && childrenIds.indexOf(instance.concept.id) > -1) { if (instance && childrenIds.indexOf(instance.concept.id) > -1) {
instanceList.push(instance); instanceList.push(instance);
@ -245,6 +247,7 @@ class CENode {
* sentence sets to be processed. * sentence sets to be processed.
*/ */
constructor(...models) { constructor(...models) {
this.languageManager = new LanguageManager(this);
this.ceParser = new CEParser(this); this.ceParser = new CEParser(this);
this.questionParser = new QuestionParser(this); this.questionParser = new QuestionParser(this);
this.nlParser = new NLParser(this); this.nlParser = new NLParser(this);

View File

@ -18,6 +18,7 @@
const CEConcept = require('./CEConcept.js'); const CEConcept = require('./CEConcept.js');
const CEInstance = require('./CEInstance.js'); const CEInstance = require('./CEInstance.js');
const en = require('../langs/en.js');
const quotes = { const quotes = {
escape(string) { escape(string) {
@ -28,6 +29,15 @@ const quotes = {
}, },
}; };
const newConcept = new RegExp(en.concept.create, 'i');
const editConcept = new RegExp(en.concept.edit);
const newInstance = new RegExp(en.instance.create);
const editInstance = new RegExp(en.instance.edit);
const andRegex = new RegExp('\\b' + en.and + '\\b', 'gi');
const and = en.and;
const value = en.value;
class CEParser { class CEParser {
/* /*
@ -43,20 +53,21 @@ class CEParser {
*/ */
parse(input, source) { parse(input, source) {
const t = input.replace(/\s+/g, ' ').replace(/\.+$/, '').trim(); // Whitespace -> single space const t = input.replace(/\s+/g, ' ').replace(/\.+$/, '').trim(); // Whitespace -> single space
if (t.match(/^conceptualise an?/i)) {
if (newConcept.test(t)){
return this.newConcept(t, source); return this.newConcept(t, source);
} else if (t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/i)) { } else if (editConcept.test(t)) {
return this.modifyConcept(t, source); return this.modifyConcept(t, source);
} else if (t.match(/^there is an? ([a-zA-Z0-9 ]*) named/i)) { } else if (newInstance.test(t)) {
return this.newInstance(t, source); return this.newInstance(t, source);
} else if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) { } else if (editInstance.test(t)) {
return this.modifyInstance(t, source); return this.modifyInstance(t, source);
} }
return [false, null]; return [false, null];
} }
newConcept(t, source) { newConcept(t, source) {
const match = t.match(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)/i); const match = newConcept.exec(t);
const conceptName = match[1]; const conceptName = match[1];
const storedConcept = this.node.getConceptByName(conceptName); const storedConcept = this.node.getConceptByName(conceptName);
let concept = null; let concept = null;
@ -65,16 +76,18 @@ class CEParser {
} }
concept = new CEConcept(this.node, conceptName, source); concept = new CEConcept(this.node, conceptName, source);
const remainder = t.replace(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+) that/, ''); const remainder = t.replace(newConcept, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g); const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts){
for (const fact of facts) { for (const fact of facts) {
this.processConceptFact(concept, fact, source); this.processConceptFact(concept, fact, source);
} }
}
return [true, t, concept]; return [true, t, concept];
} }
modifyConcept(t, source) { modifyConcept(t, source) {
const conceptInfo = t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/); const conceptInfo = editConcept.exec(t);
if (!conceptInfo) { if (!conceptInfo) {
return [false, 'Unable to parse sentence']; return [false, 'Unable to parse sentence'];
} }
@ -87,7 +100,7 @@ class CEParser {
const remainderRegex = new RegExp(`^conceptualise the ${conceptName} ${conceptVar}`, 'i'); const remainderRegex = new RegExp(`^conceptualise the ${conceptName} ${conceptVar}`, 'i');
const remainder = t.replace(remainderRegex, ''); const remainder = t.replace(remainderRegex, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g); const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
for (const fact of facts) { for (const fact of facts) {
this.processConceptFact(concept, fact, source); this.processConceptFact(concept, fact, source);
} }
@ -95,27 +108,29 @@ class CEParser {
} }
processConceptFact(concept, fact, source) { processConceptFact(concept, fact, source) {
const input = fact.trim().replace(/\+/g, 'and'); const parseVal = new RegExp(en.concept.parseValue);
if (input.match(/has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g)) { const parsePar = new RegExp(en.concept.parseParent);
const re = /has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g; const parseRel = new RegExp(en.concept.parseRel);
const match = re.exec(input); const parseSyn = new RegExp(en.concept.parseSyn);
const input = fact.trim().replace(/\+/g, and);
if (parseVal.test(input)){
const match = parseVal.exec(input);
const valConceptName = match[1]; const valConceptName = match[1];
const label = match[3]; const label = match[3];
const valConcept = valConceptName === 'value' ? 0 : this.node.getConceptByName(valConceptName); const valConcept = valConceptName === value ? 0 : this.node.getConceptByName(valConceptName);
concept.addValue(label, valConcept, source); concept.addValue(label, valConcept, source);
} }
if (input.match(/^is an? ([a-zA-Z0-9 ]*)/)) { if (parsePar.test(input)){
const re = /^is an? ([a-zA-Z0-9 ]*)/; const match = parsePar.exec(input);
const match = re.exec(input);
const parentConceptName = match[1]; const parentConceptName = match[1];
const parentConcept = this.node.getConceptByName(parentConceptName); const parentConcept = this.node.getConceptByName(parentConceptName);
if (parentConcept) { if (parentConcept) {
concept.addParent(parentConcept); concept.addParent(parentConcept);
} }
} }
if (input.match(/~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/)) { if (parseRel.test(input)){
const re = /~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/; const match = parseRel.exec(input);
const match = re.exec(input);
const label = match[1]; const label = match[1];
const relConceptName = match[2]; const relConceptName = match[2];
const relConcept = this.node.getConceptByName(relConceptName); const relConcept = this.node.getConceptByName(relConceptName);
@ -123,22 +138,17 @@ class CEParser {
concept.addRelationship(label, relConcept, source); concept.addRelationship(label, relConcept, source);
} }
} }
if (input.match(/~ is expressed by ~ ([a-zA-Z0-9 ]*)/)) { if (parseSyn.test(input)){
const re = /~ is expressed by ~ ([a-zA-Z0-9 ]*)/; const match = parseSyn.exec(input);
const match = re.exec(input);
const synonym = match[1]; const synonym = match[1];
concept.addSynonym(synonym); concept.addSynonym(synonym);
} }
} }
newInstance(t, source) { newInstance(t, source) {
let names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named '([^'\\]*(?:\\.[^'\\]*)*)'/i); const names = newInstance.exec(t)
if (!names) {
names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]*)/i);
if (!names) { return [false, 'Unable to determine name of instance.']; }
}
const conceptName = names[1]; const conceptName = names[1];
const instanceName = names[2].replace(/\\/g, ''); const instanceName = names[2].replace(/\\/g, '').replace(/'/g, '');
const concept = this.node.getConceptByName(conceptName); const concept = this.node.getConceptByName(conceptName);
const currentInstance = this.node.getInstanceByName(instanceName, concept); const currentInstance = this.node.getInstanceByName(instanceName, concept);
if (!concept) { if (!concept) {
@ -150,44 +160,55 @@ class CEParser {
const instance = new CEInstance(this.node, concept, instanceName, source); const instance = new CEInstance(this.node, concept, instanceName, source);
instance.sentences.push(t); instance.sentences.push(t);
const remainder = t.replace(/^there is an? (?:[a-zA-Z0-9 ]*) named (?:[a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') that/, ''); const remainder = t.replace(newInstance, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g); const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts){
for (const fact of facts) { for (const fact of facts) {
this.processInstanceFact(instance, fact, source); this.processInstanceFact(instance, fact, source);
} }
}
return [true, t, instance]; return [true, t, instance];
} }
modifyInstance(t, source) { modifyInstance(t, source) {
let concept; let concept;
let instance; let instance;
let instanceName; const names = editInstance.exec(t);
if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) {
const names = t.match(/^the ([a-zA-Z0-9 ]*)/i); concept = this.node.getConceptByName(names[1]);
if (concept){
instance = this.node.getInstanceByName(names[2].replace(/\\/g, '').replace(/'/g, ''));
}
else {
const nameTokens = names[1].split(' '); const nameTokens = names[1].split(' ');
for (const conceptCheck of this.node.concepts) { let currentName = '';
if (names[1].toLowerCase().indexOf(conceptCheck.name.toLowerCase()) === 0) { for (const index in nameTokens){
concept = conceptCheck; currentName += ' ' + nameTokens[index];
instanceName = nameTokens[concept.name.split(' ').length]; concept = this.node.getConceptByName(currentName.trim());
instance = this.node.getInstanceByName(instanceName, concept); if (concept){
break; break;
} }
} }
} if (concept){
if (!instance && t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i)) { const possibleInstances = this.node.getInstances(concept.name, true);
const names = t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i); let lowestIndex = null;
if (names) { for (const potential of possibleInstances){
concept = this.node.getConceptByName(names[1]); const check = new RegExp('\\b(' + potential.name + (potential.synonyms.length ? '|' + potential.synonyms.join('|') : '') + ')\\b', 'i');
instanceName = names[2].replace(/\\/g, ''); const match = check.exec(t);
instance = this.node.getInstanceByName(instanceName, concept); if (match && (lowestIndex === null || match.index < lowestIndex)){
lowestIndex = match.index;
instance = potential;
} }
} }
}
}
if (!concept || !instance) { if (!concept || !instance) {
return [false, `Unknown concept/instance combination in: ${t}`]; return [false, `Unknown concept/instance combination in: ${t}`];
} }
instance.sentences.push(t); instance.sentences.push(t);
const tokens = t.split(' '); const tokens = t.split(' ');
tokens.splice(0, 1 + concept.name.split(' ').length + instanceName.split(' ').length); tokens.splice(0, 1 + concept.name.split(' ').length + instance.name.split(' ').length);
const remainder = tokens.join(' '); const remainder = tokens.join(' ');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g); const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts) { if (facts) {
@ -200,9 +221,14 @@ class CEParser {
processInstanceFact(instance, fact, source) { processInstanceFact(instance, fact, source) {
const input = fact.trim().replace(/\+/g, 'and'); const input = fact.trim().replace(/\+/g, 'and');
if (input.match(/^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/)) { const parseRel = new RegExp(en.instance.parseRel);
const re = /^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/; const parseRawVal = new RegExp(en.instance.parseRawVal);
const match = re.exec(input); const parseInstanceVal = new RegExp(en.instance.parseInstanceVal);
const parseInstanceSubConcept = new RegExp(en.instance.parseInstanceSubConcept);
const parseInstanceSynonym = new RegExp(en.instance.parseInstanceSynonym);
if (parseRel.test(input)){
const match = parseRel.exec(input);
const label = match[1]; const label = match[1];
const relConceptName = match[2]; const relConceptName = match[2];
const relInstanceName = match[3].replace(/'/g, ''); const relInstanceName = match[3].replace(/'/g, '');
@ -216,16 +242,14 @@ class CEParser {
instance.addRelationship(label, relInstance, true, source); instance.addRelationship(label, relInstance, true, source);
} }
} }
if (input.match(/^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/)) { if (parseRawVal.test(input)){
const re = /^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/; const match = parseRawVal.exec(input);
const match = re.exec(input);
const value = quotes.unescape(match[1]); const value = quotes.unescape(match[1]);
const label = match[2]; const label = match[2];
instance.addValue(label, value, true, source); instance.addValue(label, value, true, source);
} }
if (input.match(/^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') as ([a-zA-Z0-9 ]*)/)) { if (parseInstanceVal.test(input)){
const re = /^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9]*|'[a-zA-Z0-9 ]*') as ([a-zA-Z0-9 ]*)/; const match = parseInstanceVal.exec(input);
const match = re.exec(input);
const valConceptName = match[1]; const valConceptName = match[1];
const valInstanceName = match[2].replace(/'/g, ''); const valInstanceName = match[2].replace(/'/g, '');
const label = match[3]; const label = match[3];
@ -238,13 +262,12 @@ class CEParser {
instance.addValue(label, valInstance, true, source); instance.addValue(label, valInstance, true, source);
} }
} }
if (input.match(/(?:is| )?an? ([a-zA-Z0-9 ]*)/g)) { if (parseInstanceSubConcept.test(input)){
const re = /(?:is| )?an? ([a-zA-Z0-9 ]*)/g; const match = parseInstanceSubConcept.exec(input);
const match = re.exec(input);
instance.addSubConcept(this.node.getConceptByName(match && match[1] && match[1].trim())); instance.addSubConcept(this.node.getConceptByName(match && match[1] && match[1].trim()));
} }
if (input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/)) { if (parseInstanceSynonym.test(input)){
const match = input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/); const match = parseInstanceSynonym.exec(input);
const synonym = match && match[1] && match[1].replace(/'/g, '').trim(); const synonym = match && match[1] && match[1].replace(/'/g, '').trim();
instance.addSynonym(synonym); instance.addSynonym(synonym);
} }

89
src/LanguageManager.js Normal file
View File

@ -0,0 +1,89 @@
/*
* Copyright 2017 W.M. Webberley & A.D. Preece (Cardiff University)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
'use strict';
const langs = require('../langs');
const placeholders = {
conceptName: '([a-zA-Z0-9 ]*)',
conceptVar: '([A-Z0-9]+)',
instanceName: '([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
relationshipLabel: '([a-zA-Z0-9 ]*)'
};
class LanguageManager {
static getEntry(o, s) {
s = s.replace(/\[(\w+)\]/g, '.$1');
s = s.replace(/^\./, '');
const a = s.split('.');
for (let i = 0, n = a.length; i < n; ++i) {
const k = a[i];
if (k in o) {
o = o[k];
} else {
return;
}
}
return o;
}
getExpression(key) {
let pattern = LanguageManager.getEntry(this.lang, key);
const extractions = {};
if (pattern){
console.log(pattern)
for (const placeholder in placeholders){
if (pattern.indexOf(placeholder) > -1){
const re = new RegExp('<' + placeholder + '>');
pattern = pattern.replace(re, placeholders[placeholder]);
}
}
}
console.log(pattern)
return pattern;
}
is(key, string){
const re = new RegExp(this.getExpression(key), 'i');
return re.test(string);
}
parse(key, string){
const re = new RegExp(this.getExpression(key), 'i');
return re.exec(string);
}
extract(key, string){
}
addLanguage(key, language){
langs[key] = language;
}
setLanguage(key) {
if (key in langs){
this.lang = langs[key];
}
}
constructor(node) {
this.node = node;
this.lang = langs['en'];
}
}
module.exports = LanguageManager;