Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding my readme_to_run #50

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added #scene_recognition_rana!
Empty file.
1 change: 1 addition & 0 deletions caffemodel/ReadMe.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add .caffemodel file here
Empty file added caffemodel/ReadMe.txt~
Empty file.
124 changes: 124 additions & 0 deletions prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import argparse
import json
import time
import datetime
import numpy as np
import code
import os
import cPickle as pickle
import math
import scipy.io
import subprocess

from imagernn.solver import Solver
from imagernn.imagernn_utils import decodeGenerator, eval_split

"""
This script is used to predict sentences for arbitrary images
that are located in a folder we call root_folder. It is assumed that
the root_folder contains:
- the raw images
- a file tasks.txt that lists the images you'd like to use
- a file vgg_feats.mat that contains the CNN features.
You'll need to use the Matlab script I provided and point it at the
root folder and its tasks.txt file to save the features.

Then point this script at the folder and at a checkpoint model you'd
like to evaluate.
"""

def main(params):

# load the checkpoint
checkpoint_path = params['checkpoint_path']
print 'loading checkpoint %s' % (checkpoint_path, )
checkpoint = pickle.load(open(checkpoint_path, 'rb'))
checkpoint_params = checkpoint['params']
dataset = checkpoint_params['dataset']
model = checkpoint['model']
misc = {}
misc['wordtoix'] = checkpoint['wordtoix']
ixtoword = checkpoint['ixtoword']

# output blob which we will dump to JSON for visualizing the results
blob = {}
blob['params'] = params
blob['checkpoint_params'] = checkpoint_params
blob['imgblobs'] = []
### Function from here should repeat
dir = "/home/eic/neuraltalk/push_model/video/"
n = 0
# lets run for 120 seconds
while n < 120:
if (os.listdir(dir)):
## bash script to extract feautures from the above found image in dir
subprocess.call("bash /home/eic/neuraltalk/push_model/Intermediate_call.sh", shell=True)
# load the tasks.txt file
root_path = params['root_path']
img_names = open(os.path.join(root_path, 'tasks.txt'), 'r').read().splitlines()

# load the features for all images
features_path = os.path.join(root_path, 'vgg_feats.mat')
features_struct = scipy.io.loadmat(features_path)
features = features_struct['feats'] # this is a 4096 x N numpy array of features
D,N = features.shape

# iterate over all images and predict sentences
BatchGenerator = decodeGenerator(checkpoint_params)
for n in xrange(N):
print 'image %d/%d:' % (n, N)

# encode the image
img = {}
img['feat'] = features[:, n]
img['local_file_path'] =img_names[n]

# perform the work. heavy lifting happens inside
kwparams = { 'beam_size' : params['beam_size'] }
Ys = BatchGenerator.predict([{'image':img}], model, checkpoint_params, **kwparams)

# build up the output
img_blob = {}
img_blob['img_path'] = img['local_file_path']

# encode the top prediction
top_predictions = Ys[0] # take predictions for the first (and only) image we passed in
top_prediction = top_predictions[0] # these are sorted with highest on top
candidate = ' '.join([ixtoword[ix] for ix in top_prediction[1] if ix > 0]) # ix 0 is the END token, skip that
print 'PRED: (%f) %s' % (top_prediction[0], candidate)
img_blob['candidate'] = {'text': candidate, 'logprob': top_prediction[0]}
blob['imgblobs'].append(img_blob)

# dump result struct to file
save_file = os.path.join(root_path, 'result_struct.json')
print 'writing predictions to %s...' % (save_file, )
json.dump(blob, open(save_file, 'w'))

# dump output html
html = ''
for img in blob['imgblobs']:
html += '<img src="%s" height="400"><br>' % (img['img_path'], )
html += '(%f) %s <br><br>' % (img['candidate']['logprob'], img['candidate']['text'])
html_file = os.path.join(root_path, 'result.html')
print 'writing html result file to %s...' % (html_file, )
open(html_file, 'w').write(html)
## open the html file in default browser with text
subprocess.call("xdg-open ~/neuraltalk/push_model/Out/result.html", shell=True)
else:
n += 1
print 'waiting for image'
time.sleep(1)


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument('checkpoint_path', type=str, help='the input checkpoint')
parser.add_argument('-r', '--root_path', default='example_images', type=str, help='folder with the images, tasks.txt file, and corresponding vgg_feats.mat file')
parser.add_argument('-b', '--beam_size', type=int, default=1, help='beam size in inference. 1 indicates greedy per-word max procedure. Good value is approx 20 or so, and more = better.')

args = parser.parse_args()
params = vars(args) # convert to ordinary dict
print 'parsed parameters:'
print json.dumps(params, indent = 2)
main(params)
2 changes: 2 additions & 0 deletions push_model/Intermediate_call.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python ~/neuraltalk/py_caffe_feat_extract.py --model_path ~/neuraltalk/caffemodel/VGG_ILSVRC_16_layers.caffemodel --model_def_path ~/neuraltalk/python_features/deploy_features.prototxt -i ./video --filter ./video/tasks.txt --WITH_GPU -o ./Out
rm -r ./video/*
2 changes: 2 additions & 0 deletions push_model/Intermediate_call.sh~
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python ~/neuraltalk/py_caffe_feat_extract.py --model_path ~/neuraltalk/caffemodel/VGG_ILSVRC_16_layers.caffemodel --model_def_path ~/neuraltalk/python_features/deploy_features.prototxt -i ./video --filter ./video/tasks.txt --WITH_GPU -o ./Out
rm -r ./video/*
Binary file added push_model/Out/out.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions push_model/Out/result.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<img src="out.jpg" height="400"><br>(-12.586372) a man is standing in front of a large screen <br><br><img src="out.jpg" height="400"><br>(-8.179808) a bathroom with a toilet and a sink <br><br><img src="out.jpg" height="400"><br>(-8.179808) a bathroom with a toilet and a sink <br><br><img src="out.jpg" height="400"><br>(-8.179808) a bathroom with a toilet and a sink <br><br><img src="out.jpg" height="400"><br>(-8.179808) a bathroom with a toilet and a sink <br><br>
1 change: 1 addition & 0 deletions push_model/Out/result_struct.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"checkpoint_params": {"grad_clip": 10.0, "regc": 2.66e-07, "init_model_from": "", "dataset": "coco", "image_encoding_size": 600, "drop_prob_decoder": 0.5, "word_encoding_size": 600, "max_epochs": 50, "eval_batch_size": 100, "fappend": "lstm", "generator": "lstm", "min_ppl_or_abort": 50.0, "tanhC_version": 1, "eval_max_images": -1, "decay_rate": 0.999, "rnn_feed_once": 0, "hidden_size": 600, "momentum": 0.0, "worker_status_output_directory": "/scail/u/karpathy/rnn-image-describer/status", "rnn_relu_encoders": 0, "learning_rate": 0.000404, "checkpoint_output_directory": "/scail/u/karpathy/rnn-image-describer/cv", "do_grad_check": 0, "word_count_threshold": 5, "batch_size": 64, "write_checkpoint_ppl_threshold": 15.0, "smooth_eps": 1e-08, "solver": "rmsprop", "eval_period": 0.2, "drop_prob_encoder": 0.5}, "imgblobs": [{"img_path": "out.jpg", "candidate": {"text": "a man is standing in front of a large screen", "logprob": -12.586372029703476}}, {"img_path": "out.jpg", "candidate": {"text": "a bathroom with a toilet and a sink", "logprob": -8.1798075037485667}}, {"img_path": "out.jpg", "candidate": {"text": "a bathroom with a toilet and a sink", "logprob": -8.1798075037485667}}, {"img_path": "out.jpg", "candidate": {"text": "a bathroom with a toilet and a sink", "logprob": -8.1798075037485667}}, {"img_path": "out.jpg", "candidate": {"text": "a bathroom with a toilet and a sink", "logprob": -8.1798075037485667}}], "params": {"beam_size": 1, "checkpoint_path": "/home/eic/neuraltalk/data/m.p", "root_path": "./Out"}}
1 change: 1 addition & 0 deletions push_model/Out/tasks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
out.jpg
Binary file added push_model/Out/vgg_feats.mat
Binary file not shown.
1 change: 1 addition & 0 deletions push_model/get_set.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python ~/neuraltalk/prediction.py ~/neuraltalk/data/m.p -r ./Out
1 change: 1 addition & 0 deletions push_model/get_set.sh~
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python ~/neuraltalk/prediction.py ~/neuraltalk/data/m.p -r ./ -n 4
8 changes: 8 additions & 0 deletions push_model/go.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
rm -r ./Out/*
ffmpeg -f video4linux2 -s 640x480 -i /dev/video0 -ss 0:0:1 -frames 1 ~/neuraltalk/push_model/video/out.jpg
cp tasks.txt ./video/tasks.txt
cp tasks.txt ./Out/tasks.txt
cp ./video/out.jpg ./Out/out.jpg
sleep 5
rm -r ./video/*

7 changes: 7 additions & 0 deletions push_model/go.sh~
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ffmpeg -f video4linux2 -s 640x480 -i /dev/video0 -ss 0:0:1 -frames 1 ~/neuraltalk/push_model/video/out.jpg
cp tasks.txt ./video/tasks.txt
cp tasks.txt ./Out/tasks.txt
cp ./video/out.jpg ./Out/out.jpg
sleep 5
rm -r ./video/*

Empty file added push_model/open_result.sh
Empty file.
5 changes: 5 additions & 0 deletions push_model/push.sh~
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ffmpeg -f video4linux2 -s 640x480 -i /dev/video0 -ss 0:0:2 -frames 1 ~/neuraltalk/push_model/video/out.jpg
cp task.txt ./video/task.txt
sleep 10[s]
rm -r ./video

1 change: 1 addition & 0 deletions push_model/tasks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
out.jpg
Empty file added push_model/tasks.txt~
Empty file.
5 changes: 5 additions & 0 deletions readme_to_run.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
you have download 2 files:
1.in caffefolder add VGG_ILSVRC_16_layers.caffemodel file
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
2.in data folder add m.p file (filename should be changes to this)
https://github.com/BVLC/caffe/wiki/Model-Zoo