Skip to content

Commit

Permalink
Merge pull request #359 from crawlab-team/develop
Browse files Browse the repository at this point in the history
v0.4.1
  • Loading branch information
tikazyq authored Dec 13, 2019
2 parents 4c4c62f + ce7d523 commit a6f7434
Show file tree
Hide file tree
Showing 43 changed files with 791 additions and 544 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
# 0.4.1 (2019-12-13)
### Features / Enhancement
- **Spiderfile Optimization**. Stages changed from dictionary to array. [#358](https://github.com/crawlab-team/crawlab/issues/358)
- **Baidu Tongji Update**.

### Bug Fixes
- **Unable to display schedule tasks**. [#353](https://github.com/crawlab-team/crawlab/issues/353)
- **Duplicate node registration**. [#334](https://github.com/crawlab-team/crawlab/issues/334)

# 0.4.0 (2019-12-06)
### Features / Enhancement
- **Configurable Spider**. Allow users to add spiders using *Spiderfile* to configure crawling rules.
- **Execution Mode**. Allow users to select 3 modes for task execution: *All Nodes*, *Selected Nodes* and *Random*.

### Bug Fixes
- **Task accidentally killed**. [#306](https://github.com/crawlab-team/crawlab/issues/306)
- **Documentation fix**. [#301](https://github.com/crawlab-team/crawlab/issues/258) [#301](https://github.com/crawlab-team/crawlab/issues/258)
- **Direct deploy incompatible with Windows**. [#288](https://github.com/crawlab-team/crawlab/issues/288)
- **Log files lost**. [#269](https://github.com/crawlab-team/crawlab/issues/269)

# 0.3.5 (2019-10-28)
### Features / Enhancement
- **Graceful Showdown**. [detail](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)
Expand Down
10 changes: 10 additions & 0 deletions backend/constants/schedule.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package constants

const (
ScheduleStatusStop = "stop"
ScheduleStatusRunning = "running"
ScheduleStatusError = "error"

ScheduleStatusErrorNotFoundNode = "Not Found Node"
ScheduleStatusErrorNotFoundSpider = "Not Found Spider"
)
67 changes: 67 additions & 0 deletions backend/database/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import (
"context"
"crawlab/entity"
"crawlab/utils"
"errors"
"github.com/apex/log"
"github.com/gomodule/redigo/redis"
"github.com/spf13/viper"
"runtime/debug"
"strings"
"time"
)

Expand All @@ -17,9 +19,18 @@ type Redis struct {
pool *redis.Pool
}

type Mutex struct {
Name string
expiry time.Duration
tries int
delay time.Duration
value string
}

func NewRedisClient() *Redis {
return &Redis{pool: NewRedisPool()}
}

func (r *Redis) RPush(collection string, value interface{}) error {
c := r.pool.Get()
defer utils.Close(c)
Expand Down Expand Up @@ -143,3 +154,59 @@ func Sub(channel string, consume ConsumeFunc) error {
}
return nil
}

// 构建同步锁key
func (r *Redis) getLockKey(lockKey string) string {
lockKey = strings.ReplaceAll(lockKey, ":", "-")
return "nodes:lock:" + lockKey
}

// 获得锁
func (r *Redis) Lock(lockKey string) (int64, error) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)

ts := time.Now().Unix()
ok, err := c.Do("SET", lockKey, ts, "NX", "PX", 30000)
if err != nil {
log.Errorf("get lock fail with error: %s", err.Error())
debug.PrintStack()
return 0, err
}
if err == nil && ok == nil {
log.Errorf("the lockKey is locked: key=%s", lockKey)
return 0, errors.New("the lockKey is locked")
}
return ts, nil
}

func (r *Redis) UnLock(lockKey string, value int64) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)

getValue, err := redis.Int64(c.Do("GET", lockKey))
if err != nil {
log.Errorf("get lockKey error: %s", err.Error())
debug.PrintStack()
return
}

if getValue != value {
log.Errorf("the lockKey value diff: %d, %d", value, getValue)
return
}

v, err := redis.Int64(c.Do("DEL", lockKey))
if err != nil {
log.Errorf("unlock failed, error: %s", err.Error())
debug.PrintStack()
return
}

if v == 0 {
log.Errorf("unlock failed: key=%s", lockKey)
return
}
}
2 changes: 1 addition & 1 deletion backend/entity/config_spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type ConfigSpiderData struct {
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages map[string]Stage `yaml:"stages" json:"stages"`
Stages []Stage `yaml:"stages" json:"stages"`
Settings map[string]string `yaml:"settings" json:"settings"`
}

Expand Down
13 changes: 8 additions & 5 deletions backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,20 @@ func main() {
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
authGroup.PUT("/tasks", routes.PutTask) // 派发任务
authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务
authGroup.DELETE("/tasks_multiple", routes.DeleteMultipleTask) // 删除多个任务
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) //删除指定状态的任务
authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务
authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志
authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果
authGroup.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果
// 定时任务
authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表
authGroup.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情
authGroup.PUT("/schedules", routes.PutSchedule) // 创建定时任务
authGroup.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务
authGroup.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务
authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表
authGroup.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情
authGroup.PUT("/schedules", routes.PutSchedule) // 创建定时任务
authGroup.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务
authGroup.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务
authGroup.POST("/schedules/:id/stop", routes.StopSchedule) // 停止定时任务
authGroup.POST("/schedules/:id/run", routes.RunSchedule) // 运行定时任务
// 统计数据
authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据
// 用户
Expand Down
10 changes: 3 additions & 7 deletions backend/model/config_spider/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,12 @@ func GetAllFields(data entity.ConfigSpiderData) []entity.Field {
func GetStartStageName(data entity.ConfigSpiderData) string {
// 如果 start_stage 设置了且在 stages 里,则返回
if data.StartStage != "" {
for stageName := range data.Stages {
if stageName == data.StartStage {
return data.StartStage
}
}
return data.StartStage
}

// 否则返回第一个 stage
for stageName := range data.Stages {
return stageName
for _, stage := range data.Stages {
return stage.Name
}
return ""
}
3 changes: 2 additions & 1 deletion backend/model/config_spider/scrapy.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ func (g ScrapyGenerator) ProcessSpider() error {

// 替换 parsers
strParser := ""
for stageName, stage := range g.ConfigData.Stages {
for _, stage := range g.ConfigData.Stages {
stageName := stage.Name
stageStr := g.GetParserString(stageName, stage)
strParser += stageStr
}
Expand Down
2 changes: 1 addition & 1 deletion backend/model/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func GetNode(id bson.ObjectId) (Node, error) {
defer s.Close()

if err := c.FindId(id).One(&node); err != nil {
log.Errorf(err.Error())
log.Errorf("get node error: %s, id: %s", err.Error(), id.Hex())
debug.PrintStack()
return node, err
}
Expand Down
121 changes: 65 additions & 56 deletions backend/model/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,25 @@ import (
)

type Schedule struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
NodeKey string `json:"node_key" bson:"node_key"`
Cron string `json:"cron" bson:"cron"`
EntryId cron.EntryID `json:"entry_id" bson:"entry_id"`
Param string `json:"param" bson:"param"`
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
//NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
//NodeKey string `json:"node_key" bson:"node_key"`
Cron string `json:"cron" bson:"cron"`
EntryId cron.EntryID `json:"entry_id" bson:"entry_id"`
Param string `json:"param" bson:"param"`
RunType string `json:"run_type" bson:"run_type"`
NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"`

// 状态
Status string `json:"status" bson:"status"`

// 前端展示
SpiderName string `json:"spider_name" bson:"spider_name"`
NodeName string `json:"node_name" bson:"node_name"`
Message string `json:"message" bson:"message"`

CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
Expand All @@ -46,26 +52,26 @@ func (sch *Schedule) Delete() error {
return c.RemoveId(sch.Id)
}

func (sch *Schedule) SyncNodeIdAndSpiderId(node Node, spider Spider) {
sch.syncNodeId(node)
sch.syncSpiderId(spider)
}

func (sch *Schedule) syncNodeId(node Node) {
if node.Id.Hex() == sch.NodeId.Hex() {
return
}
sch.NodeId = node.Id
_ = sch.Save()
}

func (sch *Schedule) syncSpiderId(spider Spider) {
if spider.Id.Hex() == sch.SpiderId.Hex() {
return
}
sch.SpiderId = spider.Id
_ = sch.Save()
}
//func (sch *Schedule) SyncNodeIdAndSpiderId(node Node, spider Spider) {
// sch.syncNodeId(node)
// sch.syncSpiderId(spider)
//}

//func (sch *Schedule) syncNodeId(node Node) {
// if node.Id.Hex() == sch.NodeId.Hex() {
// return
// }
// sch.NodeId = node.Id
// _ = sch.Save()
//}

//func (sch *Schedule) syncSpiderId(spider Spider) {
// if spider.Id.Hex() == sch.SpiderId.Hex() {
// return
// }
// sch.SpiderId = spider.Id
// _ = sch.Save()
//}

func GetScheduleList(filter interface{}) ([]Schedule, error) {
s, c := database.GetCol("schedules")
Expand All @@ -78,29 +84,31 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) {

var schs []Schedule
for _, schedule := range schedules {
// 获取节点名称
if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) {
// 选择所有节点
schedule.NodeName = "All Nodes"
} else {
// 选择单一节点
node, err := GetNode(schedule.NodeId)
if err != nil {
log.Errorf(err.Error())
continue
}
schedule.NodeName = node.Name
}
// TODO: 获取节点名称
//if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) {
// // 选择所有节点
// schedule.NodeName = "All Nodes"
//} else {
// // 选择单一节点
// node, err := GetNode(schedule.NodeId)
// if err != nil {
// schedule.Status = constants.ScheduleStatusError
// schedule.Message = constants.ScheduleStatusErrorNotFoundNode
// } else {
// schedule.NodeName = node.Name
// }
//}

// 获取爬虫名称
spider, err := GetSpider(schedule.SpiderId)
if err != nil && err == mgo.ErrNotFound {
log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error())
debug.PrintStack()
_ = schedule.Delete()
continue
schedule.Status = constants.ScheduleStatusError
schedule.Message = constants.ScheduleStatusErrorNotFoundSpider
} else {
schedule.SpiderName = spider.Name
}
schedule.SpiderName = spider.Name

schs = append(schs, schedule)
}
return schs, nil
Expand All @@ -125,12 +133,13 @@ func UpdateSchedule(id bson.ObjectId, item Schedule) error {
if err := c.FindId(id).One(&result); err != nil {
return err
}
node, err := GetNode(item.NodeId)
if err != nil {
return err
}
//node, err := GetNode(item.NodeId)
//if err != nil {
// return err
//}

item.NodeKey = node.Key
item.UpdateTs = time.Now()
//item.NodeKey = node.Key
if err := item.Save(); err != nil {
return err
}
Expand All @@ -141,15 +150,15 @@ func AddSchedule(item Schedule) error {
s, c := database.GetCol("schedules")
defer s.Close()

node, err := GetNode(item.NodeId)
if err != nil {
return err
}
//node, err := GetNode(item.NodeId)
//if err != nil {
// return err
//}

item.Id = bson.NewObjectId()
item.CreateTs = time.Now()
item.UpdateTs = time.Now()
item.NodeKey = node.Key
//item.NodeKey = node.Key

if err := c.Insert(&item); err != nil {
debug.PrintStack()
Expand Down
6 changes: 0 additions & 6 deletions backend/model/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,5 @@ func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) {
return configData, err
}

// 赋值 stage_name
for stageName, stage := range configData.Stages {
stage.Name = stageName
configData.Stages[stageName] = stage
}

return configData, nil
}
Loading

0 comments on commit a6f7434

Please sign in to comment.