Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
861c10d544 | ||
|
766be35907 | ||
|
aa74554593 | ||
|
503360d605 | ||
|
e1b5e48e4b |
21
langs/en.js
Normal file
21
langs/en.js
Normal file
@ -0,0 +1,21 @@
|
||||
module.exports = {
|
||||
concept: {
|
||||
create: '^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)(?: that)?',
|
||||
edit: '^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)',
|
||||
parseValue: 'has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~',
|
||||
parseParent: '^is an? ([a-zA-Z0-9 ]*)',
|
||||
parseRel: '~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)',
|
||||
parseSyn: '~ is expressed by ~ ([a-zA-Z0-9 ]*)'
|
||||
},
|
||||
instance: {
|
||||
create: '^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')(?: that)?',
|
||||
edit: 'the ([a-zA-Z0-9_ ]+) ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
|
||||
parseRel: '(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_\' ]*)',
|
||||
parseRawVal: '^has ([a-zA-Z0-9]*|\'[^\'\]*(?:\\.[^\'\]*)*\') as ([a-zA-Z0-9 ]*)',
|
||||
parseInstanceVal: 'has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|\'[a-zA-Z0-9_ ]*\') as ([a-zzA-Z0-9 ]*)',
|
||||
parseInstanceSubConcept: '(?:is| )?an? ([a-zA-Z0-9 ]*)',
|
||||
parseInstanceSynonym: 'is expressed by (\'[a-zA-Z0-9 ]*\'|[a-zA-Z0-9]*)'
|
||||
},
|
||||
and: 'and',
|
||||
value: 'value'
|
||||
};
|
3
langs/index.js
Normal file
3
langs/index.js
Normal file
@ -0,0 +1,3 @@
|
||||
var en = require('./en.js');
|
||||
|
||||
module.exports = {en};
|
@ -16,6 +16,7 @@
|
||||
*/
|
||||
'use strict';
|
||||
|
||||
const LanguageManager = require('./LanguageManager.js');
|
||||
const CEAgent = require('./CEAgent.js');
|
||||
const CEParser = require('./CEParser.js');
|
||||
const QuestionParser = require('./QuestionParser.js');
|
||||
@ -118,6 +119,7 @@ class CENode {
|
||||
const descendants = concept.descendants.concat(concept);
|
||||
const childrenIds = [];
|
||||
for (const descendant of descendants) { childrenIds.push(descendant.id); }
|
||||
for (const ancestor of concept.ancestors) { childrenIds.push(ancestor.id); }
|
||||
for (const instance of this.instances) {
|
||||
if (instance && childrenIds.indexOf(instance.concept.id) > -1) {
|
||||
instanceList.push(instance);
|
||||
@ -245,6 +247,7 @@ class CENode {
|
||||
* sentence sets to be processed.
|
||||
*/
|
||||
constructor(...models) {
|
||||
this.languageManager = new LanguageManager(this);
|
||||
this.ceParser = new CEParser(this);
|
||||
this.questionParser = new QuestionParser(this);
|
||||
this.nlParser = new NLParser(this);
|
||||
|
153
src/CEParser.js
153
src/CEParser.js
@ -18,6 +18,7 @@
|
||||
|
||||
const CEConcept = require('./CEConcept.js');
|
||||
const CEInstance = require('./CEInstance.js');
|
||||
const en = require('../langs/en.js');
|
||||
|
||||
const quotes = {
|
||||
escape(string) {
|
||||
@ -28,6 +29,15 @@ const quotes = {
|
||||
},
|
||||
};
|
||||
|
||||
const newConcept = new RegExp(en.concept.create, 'i');
|
||||
const editConcept = new RegExp(en.concept.edit);
|
||||
const newInstance = new RegExp(en.instance.create);
|
||||
const editInstance = new RegExp(en.instance.edit);
|
||||
|
||||
const andRegex = new RegExp('\\b' + en.and + '\\b', 'gi');
|
||||
const and = en.and;
|
||||
const value = en.value;
|
||||
|
||||
class CEParser {
|
||||
|
||||
/*
|
||||
@ -43,20 +53,21 @@ class CEParser {
|
||||
*/
|
||||
parse(input, source) {
|
||||
const t = input.replace(/\s+/g, ' ').replace(/\.+$/, '').trim(); // Whitespace -> single space
|
||||
if (t.match(/^conceptualise an?/i)) {
|
||||
|
||||
if (newConcept.test(t)){
|
||||
return this.newConcept(t, source);
|
||||
} else if (t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/i)) {
|
||||
} else if (editConcept.test(t)) {
|
||||
return this.modifyConcept(t, source);
|
||||
} else if (t.match(/^there is an? ([a-zA-Z0-9 ]*) named/i)) {
|
||||
} else if (newInstance.test(t)) {
|
||||
return this.newInstance(t, source);
|
||||
} else if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) {
|
||||
} else if (editInstance.test(t)) {
|
||||
return this.modifyInstance(t, source);
|
||||
}
|
||||
return [false, null];
|
||||
}
|
||||
|
||||
newConcept(t, source) {
|
||||
const match = t.match(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)/i);
|
||||
const match = newConcept.exec(t);
|
||||
const conceptName = match[1];
|
||||
const storedConcept = this.node.getConceptByName(conceptName);
|
||||
let concept = null;
|
||||
@ -65,16 +76,18 @@ class CEParser {
|
||||
}
|
||||
concept = new CEConcept(this.node, conceptName, source);
|
||||
|
||||
const remainder = t.replace(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+) that/, '');
|
||||
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
for (const fact of facts) {
|
||||
this.processConceptFact(concept, fact, source);
|
||||
const remainder = t.replace(newConcept, '');
|
||||
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
if (facts){
|
||||
for (const fact of facts) {
|
||||
this.processConceptFact(concept, fact, source);
|
||||
}
|
||||
}
|
||||
return [true, t, concept];
|
||||
}
|
||||
|
||||
modifyConcept(t, source) {
|
||||
const conceptInfo = t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/);
|
||||
const conceptInfo = editConcept.exec(t);
|
||||
if (!conceptInfo) {
|
||||
return [false, 'Unable to parse sentence'];
|
||||
}
|
||||
@ -87,7 +100,7 @@ class CEParser {
|
||||
|
||||
const remainderRegex = new RegExp(`^conceptualise the ${conceptName} ${conceptVar}`, 'i');
|
||||
const remainder = t.replace(remainderRegex, '');
|
||||
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
for (const fact of facts) {
|
||||
this.processConceptFact(concept, fact, source);
|
||||
}
|
||||
@ -95,27 +108,29 @@ class CEParser {
|
||||
}
|
||||
|
||||
processConceptFact(concept, fact, source) {
|
||||
const input = fact.trim().replace(/\+/g, 'and');
|
||||
if (input.match(/has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g)) {
|
||||
const re = /has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g;
|
||||
const match = re.exec(input);
|
||||
const parseVal = new RegExp(en.concept.parseValue);
|
||||
const parsePar = new RegExp(en.concept.parseParent);
|
||||
const parseRel = new RegExp(en.concept.parseRel);
|
||||
const parseSyn = new RegExp(en.concept.parseSyn);
|
||||
|
||||
const input = fact.trim().replace(/\+/g, and);
|
||||
if (parseVal.test(input)){
|
||||
const match = parseVal.exec(input);
|
||||
const valConceptName = match[1];
|
||||
const label = match[3];
|
||||
const valConcept = valConceptName === 'value' ? 0 : this.node.getConceptByName(valConceptName);
|
||||
const valConcept = valConceptName === value ? 0 : this.node.getConceptByName(valConceptName);
|
||||
concept.addValue(label, valConcept, source);
|
||||
}
|
||||
if (input.match(/^is an? ([a-zA-Z0-9 ]*)/)) {
|
||||
const re = /^is an? ([a-zA-Z0-9 ]*)/;
|
||||
const match = re.exec(input);
|
||||
if (parsePar.test(input)){
|
||||
const match = parsePar.exec(input);
|
||||
const parentConceptName = match[1];
|
||||
const parentConcept = this.node.getConceptByName(parentConceptName);
|
||||
if (parentConcept) {
|
||||
concept.addParent(parentConcept);
|
||||
}
|
||||
}
|
||||
if (input.match(/~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/)) {
|
||||
const re = /~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/;
|
||||
const match = re.exec(input);
|
||||
if (parseRel.test(input)){
|
||||
const match = parseRel.exec(input);
|
||||
const label = match[1];
|
||||
const relConceptName = match[2];
|
||||
const relConcept = this.node.getConceptByName(relConceptName);
|
||||
@ -123,22 +138,17 @@ class CEParser {
|
||||
concept.addRelationship(label, relConcept, source);
|
||||
}
|
||||
}
|
||||
if (input.match(/~ is expressed by ~ ([a-zA-Z0-9 ]*)/)) {
|
||||
const re = /~ is expressed by ~ ([a-zA-Z0-9 ]*)/;
|
||||
const match = re.exec(input);
|
||||
if (parseSyn.test(input)){
|
||||
const match = parseSyn.exec(input);
|
||||
const synonym = match[1];
|
||||
concept.addSynonym(synonym);
|
||||
}
|
||||
}
|
||||
|
||||
newInstance(t, source) {
|
||||
let names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named '([^'\\]*(?:\\.[^'\\]*)*)'/i);
|
||||
if (!names) {
|
||||
names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]*)/i);
|
||||
if (!names) { return [false, 'Unable to determine name of instance.']; }
|
||||
}
|
||||
const names = newInstance.exec(t)
|
||||
const conceptName = names[1];
|
||||
const instanceName = names[2].replace(/\\/g, '');
|
||||
const instanceName = names[2].replace(/\\/g, '').replace(/'/g, '');
|
||||
const concept = this.node.getConceptByName(conceptName);
|
||||
const currentInstance = this.node.getInstanceByName(instanceName, concept);
|
||||
if (!concept) {
|
||||
@ -150,10 +160,12 @@ class CEParser {
|
||||
const instance = new CEInstance(this.node, concept, instanceName, source);
|
||||
instance.sentences.push(t);
|
||||
|
||||
const remainder = t.replace(/^there is an? (?:[a-zA-Z0-9 ]*) named (?:[a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') that/, '');
|
||||
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
for (const fact of facts) {
|
||||
this.processInstanceFact(instance, fact, source);
|
||||
const remainder = t.replace(newInstance, '');
|
||||
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
if (facts){
|
||||
for (const fact of facts) {
|
||||
this.processInstanceFact(instance, fact, source);
|
||||
}
|
||||
}
|
||||
return [true, t, instance];
|
||||
}
|
||||
@ -161,33 +173,42 @@ class CEParser {
|
||||
modifyInstance(t, source) {
|
||||
let concept;
|
||||
let instance;
|
||||
let instanceName;
|
||||
if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) {
|
||||
const names = t.match(/^the ([a-zA-Z0-9 ]*)/i);
|
||||
const names = editInstance.exec(t);
|
||||
|
||||
concept = this.node.getConceptByName(names[1]);
|
||||
if (concept){
|
||||
instance = this.node.getInstanceByName(names[2].replace(/\\/g, '').replace(/'/g, ''));
|
||||
}
|
||||
else {
|
||||
const nameTokens = names[1].split(' ');
|
||||
for (const conceptCheck of this.node.concepts) {
|
||||
if (names[1].toLowerCase().indexOf(conceptCheck.name.toLowerCase()) === 0) {
|
||||
concept = conceptCheck;
|
||||
instanceName = nameTokens[concept.name.split(' ').length];
|
||||
instance = this.node.getInstanceByName(instanceName, concept);
|
||||
let currentName = '';
|
||||
for (const index in nameTokens){
|
||||
currentName += ' ' + nameTokens[index];
|
||||
concept = this.node.getConceptByName(currentName.trim());
|
||||
if (concept){
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!instance && t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i)) {
|
||||
const names = t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i);
|
||||
if (names) {
|
||||
concept = this.node.getConceptByName(names[1]);
|
||||
instanceName = names[2].replace(/\\/g, '');
|
||||
instance = this.node.getInstanceByName(instanceName, concept);
|
||||
if (concept){
|
||||
const possibleInstances = this.node.getInstances(concept.name, true);
|
||||
let lowestIndex = null;
|
||||
for (const potential of possibleInstances){
|
||||
const check = new RegExp('\\b(' + potential.name + (potential.synonyms.length ? '|' + potential.synonyms.join('|') : '') + ')\\b', 'i');
|
||||
const match = check.exec(t);
|
||||
if (match && (lowestIndex === null || match.index < lowestIndex)){
|
||||
lowestIndex = match.index;
|
||||
instance = potential;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!concept || !instance) {
|
||||
return [false, `Unknown concept/instance combination in: ${t}`];
|
||||
}
|
||||
instance.sentences.push(t);
|
||||
const tokens = t.split(' ');
|
||||
tokens.splice(0, 1 + concept.name.split(' ').length + instanceName.split(' ').length);
|
||||
tokens.splice(0, 1 + concept.name.split(' ').length + instance.name.split(' ').length);
|
||||
const remainder = tokens.join(' ');
|
||||
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
|
||||
if (facts) {
|
||||
@ -200,9 +221,14 @@ class CEParser {
|
||||
|
||||
processInstanceFact(instance, fact, source) {
|
||||
const input = fact.trim().replace(/\+/g, 'and');
|
||||
if (input.match(/^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/)) {
|
||||
const re = /^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/;
|
||||
const match = re.exec(input);
|
||||
const parseRel = new RegExp(en.instance.parseRel);
|
||||
const parseRawVal = new RegExp(en.instance.parseRawVal);
|
||||
const parseInstanceVal = new RegExp(en.instance.parseInstanceVal);
|
||||
const parseInstanceSubConcept = new RegExp(en.instance.parseInstanceSubConcept);
|
||||
const parseInstanceSynonym = new RegExp(en.instance.parseInstanceSynonym);
|
||||
|
||||
if (parseRel.test(input)){
|
||||
const match = parseRel.exec(input);
|
||||
const label = match[1];
|
||||
const relConceptName = match[2];
|
||||
const relInstanceName = match[3].replace(/'/g, '');
|
||||
@ -216,16 +242,14 @@ class CEParser {
|
||||
instance.addRelationship(label, relInstance, true, source);
|
||||
}
|
||||
}
|
||||
if (input.match(/^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/)) {
|
||||
const re = /^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/;
|
||||
const match = re.exec(input);
|
||||
if (parseRawVal.test(input)){
|
||||
const match = parseRawVal.exec(input);
|
||||
const value = quotes.unescape(match[1]);
|
||||
const label = match[2];
|
||||
instance.addValue(label, value, true, source);
|
||||
}
|
||||
if (input.match(/^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') as ([a-zA-Z0-9 ]*)/)) {
|
||||
const re = /^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9]*|'[a-zA-Z0-9 ]*') as ([a-zA-Z0-9 ]*)/;
|
||||
const match = re.exec(input);
|
||||
if (parseInstanceVal.test(input)){
|
||||
const match = parseInstanceVal.exec(input);
|
||||
const valConceptName = match[1];
|
||||
const valInstanceName = match[2].replace(/'/g, '');
|
||||
const label = match[3];
|
||||
@ -238,13 +262,12 @@ class CEParser {
|
||||
instance.addValue(label, valInstance, true, source);
|
||||
}
|
||||
}
|
||||
if (input.match(/(?:is| )?an? ([a-zA-Z0-9 ]*)/g)) {
|
||||
const re = /(?:is| )?an? ([a-zA-Z0-9 ]*)/g;
|
||||
const match = re.exec(input);
|
||||
if (parseInstanceSubConcept.test(input)){
|
||||
const match = parseInstanceSubConcept.exec(input);
|
||||
instance.addSubConcept(this.node.getConceptByName(match && match[1] && match[1].trim()));
|
||||
}
|
||||
if (input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/)) {
|
||||
const match = input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/);
|
||||
if (parseInstanceSynonym.test(input)){
|
||||
const match = parseInstanceSynonym.exec(input);
|
||||
const synonym = match && match[1] && match[1].replace(/'/g, '').trim();
|
||||
instance.addSynonym(synonym);
|
||||
}
|
||||
|
89
src/LanguageManager.js
Normal file
89
src/LanguageManager.js
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright 2017 W.M. Webberley & A.D. Preece (Cardiff University)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
'use strict';
|
||||
const langs = require('../langs');
|
||||
|
||||
const placeholders = {
|
||||
conceptName: '([a-zA-Z0-9 ]*)',
|
||||
conceptVar: '([A-Z0-9]+)',
|
||||
instanceName: '([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
|
||||
relationshipLabel: '([a-zA-Z0-9 ]*)'
|
||||
};
|
||||
|
||||
class LanguageManager {
|
||||
|
||||
static getEntry(o, s) {
|
||||
s = s.replace(/\[(\w+)\]/g, '.$1');
|
||||
s = s.replace(/^\./, '');
|
||||
const a = s.split('.');
|
||||
for (let i = 0, n = a.length; i < n; ++i) {
|
||||
const k = a[i];
|
||||
if (k in o) {
|
||||
o = o[k];
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
getExpression(key) {
|
||||
let pattern = LanguageManager.getEntry(this.lang, key);
|
||||
const extractions = {};
|
||||
if (pattern){
|
||||
console.log(pattern)
|
||||
for (const placeholder in placeholders){
|
||||
if (pattern.indexOf(placeholder) > -1){
|
||||
const re = new RegExp('<' + placeholder + '>');
|
||||
pattern = pattern.replace(re, placeholders[placeholder]);
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(pattern)
|
||||
return pattern;
|
||||
}
|
||||
|
||||
is(key, string){
|
||||
const re = new RegExp(this.getExpression(key), 'i');
|
||||
return re.test(string);
|
||||
}
|
||||
|
||||
parse(key, string){
|
||||
const re = new RegExp(this.getExpression(key), 'i');
|
||||
return re.exec(string);
|
||||
}
|
||||
|
||||
extract(key, string){
|
||||
|
||||
}
|
||||
|
||||
addLanguage(key, language){
|
||||
langs[key] = language;
|
||||
}
|
||||
|
||||
setLanguage(key) {
|
||||
if (key in langs){
|
||||
this.lang = langs[key];
|
||||
}
|
||||
}
|
||||
|
||||
constructor(node) {
|
||||
this.node = node;
|
||||
this.lang = langs['en'];
|
||||
}
|
||||
}
|
||||
module.exports = LanguageManager;
|
Loading…
Reference in New Issue
Block a user