Skip to content

Commit

Permalink
Merge pull request #34 from a-mma/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
freakeinstein authored Sep 2, 2019
2 parents ad33299 + b182d23 commit 3cb9697
Show file tree
Hide file tree
Showing 16 changed files with 118 additions and 191 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ node_js:
- "11"

before_install:
- docker build -f Dockerfile_travis -t ammaorg/aquiladb:travis .
- docker build -f Dockerfile_local_build -t ammaorg/aquiladb:travis .
- docker run -d -i -p 50051:50051 -t ammaorg/aquiladb:travis
- docker ps -a
- sudo apt-get install -y make
Expand Down
47 changes: 47 additions & 0 deletions Dockerfile_bleeding_edge
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
FROM ubuntu:18.04
MAINTAINER a_mma

# install couchdb
RUN apt-get update; apt-get install -y curl; apt-get install -y git
RUN echo "deb https://apache.bintray.com/couchdb-deb bionic main" \
| tee -a /etc/apt/sources.list
RUN apt-get install -y gnupg
RUN curl -L https://couchdb.apache.org/repo/bintray-pubkey.asc \
| apt-key add -
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y couchdb

# clone AquilaDB src
RUN mkdir AquilaDB && cd AquilaDB && git clone https://github.com/a-mma/AquilaDB.git . && git checkout develop

# setup node environment
RUN curl -sL https://deb.nodesource.com/setup_10.x | bash -
RUN apt install -y nodejs
RUN apt-get install -y make
RUN cd AquilaDB/src && rm package-lock.json || true && npm install

# setup python environment
RUN python3 --version
RUN apt-get -y install python3-pip
RUN apt-get install -y wget
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
ENV PATH /opt/conda/bin:$PATH
RUN conda create -n myenv python && conda install faiss-cpu=1.5.1 -c pytorch -y
RUN python -m pip install grpcio-tools annoy pyyaml

# install pm2 to run services
RUN npm install pm2 -g

# make init script executable
RUN chmod +x /AquilaDB/src/init_aquila_db.sh

# create DB data directory
RUN mkdir /data && mkdir /AquilaDB/src/data && \
ln -s /opt/couchdb/data /data/DDB && \
ln -s /AquilaDB/src/data /data/VDB

CMD /AquilaDB/src/init_aquila_db.sh && tail -f /dev/null
5 changes: 1 addition & 4 deletions Dockerfile_travis → Dockerfile_local_build
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ RUN curl -L https://couchdb.apache.org/repo/bintray-pubkey.asc \
| apt-key add -
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y couchdb

# clone AquilaDB src
# RUN mkdir AquilaDB && cd AquilaDB && git clone https://github.com/a-mma/AquilaDB.git .

# setup node environment
RUN curl -sL https://deb.nodesource.com/setup_10.x | bash -
RUN apt install -y nodejs
Expand All @@ -34,7 +31,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
echo "conda activate base" >> ~/.bashrc
ENV PATH /opt/conda/bin:$PATH
RUN conda create -n myenv python && conda install faiss-cpu=1.5.1 -c pytorch -y
RUN python -m pip install grpcio-tools annoy
RUN python -m pip install grpcio-tools annoy pyyaml

# install pm2 to run services
RUN npm install pm2 -g
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile → Dockerfile_stable_version
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
echo "conda activate base" >> ~/.bashrc
ENV PATH /opt/conda/bin:$PATH
RUN conda create -n myenv python && conda install faiss-cpu=1.5.1 -c pytorch -y
RUN python -m pip install grpcio-tools annoy
RUN python -m pip install grpcio-tools annoy pyyaml

# install pm2 to run services
RUN npm install pm2 -g
Expand Down
11 changes: 7 additions & 4 deletions src/DB_config.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
docs:
vecount: 100 # minimum data required to start indexing
faiss:
init:
vecount: 100 # minimum data required to start indexing
nlist: 1 # number of cells
nprobe: 1 # number of cells that are visited to perform a search
bpv: 8 # bytes per vector
bpsv: 8 # bytes per sub vector
vd: 784 # fixed vector dimension
indexdump:
root: '/AquilaDB/src/data' # file location for index dump
name: rawdump # file name for data dump
annoy:
init:
vd: 784 # fixed vector dimension
smetric: 'angular' # similarity metric to be used
ntrees: 10 # no. of trees
couchDB:
DBInstance: default # database namespace
host: http://localhost:5984
Expand Down
3 changes: 1 addition & 2 deletions src/core/document/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
const atob = require('atob')
const crypto = require('crypto')
const utils = require('../../utils')

Expand Down Expand Up @@ -29,7 +28,7 @@ module.exports = {
var payload = {}
if (document.b64data) {
try {
payload = JSON.parse(atob(document.b64data))
payload = JSON.parse(document.b64data.toString("utf-8"))
}
catch (err) {
console.log(err)
Expand Down
4 changes: 1 addition & 3 deletions src/core/faissclient/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
const grpc = require('grpc')
const protoLoader = require('@grpc/proto-loader')
const btoa = require('btoa')
const atob = require('atob')
const njs = require('numjs')
const crypto = require('crypto')
const utils = require('../../utils')
Expand Down Expand Up @@ -367,7 +365,7 @@ module.exports = {
resp_ = {
status: true,
dist_matrix: dist_matrix_str,
documents: btoa(JSON.stringify(resp.rows))
documents: Buffer.from(JSON.stringify(resp.rows))
}
cbk(err, resp_)
}
Expand Down
1 change: 0 additions & 1 deletion src/core/search/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
const btoa = require('btoa')
const faiss_client = require('../faissclient')

module.exports = {
Expand Down
75 changes: 50 additions & 25 deletions src/hannoy/index.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
import numpy as np
from annoy import AnnoyIndex
import yaml
import os

model_location = '/data/VDB/model_ha'

class Annoy:
def __init__(self):
self.dim = 300
self.sim_metric = 'angular'
self.n_trees = 10
self.search_k = 1
self.modelLoaded = False # self.loadModelFromDisk(model_location)
self.total = 0
# this is to keep track of all vectors inserted
# for saving into disk and retrieve later
self.index_disk = None
try:
with open('DB_config.yml', 'r') as stream:
DB_config = yaml.safe_load(stream)
self.dim = os.getenv('FIXED_VEC_DIMENSION', DB_config['annoy']['init']['vd'])
self.sim_metric = os.getenv('ANNOY_SIM_METRIC', DB_config['annoy']['init']['smetric'])
self.n_trees = os.getenv('ANNOY_NTREES', DB_config['annoy']['init']['ntrees'])
self.modelLoaded = self.loadModelFromDisk()
except Exception as e:
print('Error initializing Annoy: ', e)

def initAnnoy(self, dim, metric, matrix):
self.sim_metric = metric
self.dim = dim
def initAnnoy(self):
# only do if no index loaded from disk
if not self.modelLoaded:
print('Annoy init index')
self.a_index = AnnoyIndex(self.dim, self.sim_metric)

print('Annoy init index')
self.a_index = AnnoyIndex(self.dim, self.sim_metric)
# build index
build_ = self.a_index.build(self.n_trees)

#if build_:
# self.modelLoaded = self.saveModelToDisk(model_location, self.a_index)
return build_ #self.modelLoaded
if build_:
self.modelLoaded = self.saveModelToDisk()
return self.modelLoaded

def addVectors(self, documents):
ids = []
# unbuild annoy index before adding new data
# unbuild index first
self.a_index.unbuild()
self.total = self.total + len(documents)
ids = []
# add vectors
for document in documents:
_id = document._id
Expand All @@ -43,12 +55,19 @@ def addVectors(self, documents):

# add vector
self.a_index.add_item(int(_id), vector_e)
# keep a copy for disk storage
list_ = vector_e
list_.append(int(_id))
if self.index_disk is None:
self.index_disk = np.array([list_], dtype=float)
else:
self.index_disk = np.append(self.index_disk, [list_], axis=0)

# build vector
build_ = self.a_index.build(self.n_trees)
# if build_:
# self.modelLoaded = self.saveModelToDisk(model_location, self.a_index)
return build_, ids
if build_:
self.modelLoaded = self.saveModelToDisk()
return self.modelLoaded, ids

def deleteVectors(self, ids):

Expand All @@ -59,27 +78,33 @@ def getNearest(self, matrix, k):
dists = []

for vec_data in matrix:
_id, _dist = self.a_index.get_nns_by_vector(vec_data, k, search_k=self.search_k, include_distances=True)
_id, _dist = self.a_index.get_nns_by_vector(vec_data, k, include_distances=True)
ids.append(_id)
dists.append(_dist)

return True, ids, dists

def loadModelFromDisk(self, location):
def loadModelFromDisk(self):
try:
# read index
# prepare new index
self.a_index = AnnoyIndex(self.dim, self.sim_metric)
self.a_index.load(location)
# read index
self.index_disk = np.load(model_location+'.npy')
# build Annoy Index
for vec_ in self.index_disk.tolist():
self.a_index.add_item(int(vec_[-1]), vec_[0:-1])
# build index
build_ = self.a_index.build(self.n_trees)
print('Annoy index loading success')
return True
except:
except Exception as e:
print('Annoy index loading failed')
return False

def saveModelToDisk(self, location, index):
def saveModelToDisk(self):
try:
# write index
index.save(location)
np.save(model_location, self.index_disk)
print('Annoy index writing success')
return True
except:
Expand Down
Loading

0 comments on commit 3cb9697

Please sign in to comment.