rillomas tech log: 2010

2010年11月15日月曜日

GDD 2010 dev quizのしりとりを解く（はずの）Goコード

ちょっと前GDD 2010のdev quiz用に書いたGoのコードを公開してみる。当日は出張で出席できないことがわかってたので、実際に申し込んではいないからあってるかどうかは定かでない。とりあえずLevel 2まではそれっぽい答が出てるのは確認した。Level 3は時間がかかりすぎるパスを先に切らないと常識的な時間で終わらなかったはず。

ちなみに、元々はPythonで書いたのをGoに書き直したコードだったりする。実行時間的には4分かかってた処理が20秒くらいに縮まった。

コード


package main
import (
    "flag"
    "os"
    "log"
    "bufio"
    "fmt"
    "strings"
    "container/vector"
)

var logTag string = ""
var logger *log.Logger = log.New(os.Stdout, logTag, 0)

func ExtractFirstAndLastLetter(word string) string {
    first := word[0]
    last := word[len(word)-1]
    return fmt.Sprintf("%c%c", first, last)
}

var CHAR_LIST string = "abcdefghijklmnopqrstuvwxyz"

type StringList struct {
    list vector.StringVector
}

func (l *StringList) add(s string) {
    l.list.Push(s)
}

func (l *StringList) copy() StringList {
    copied := l.list.Copy()
    return StringList { copied }
}

func (l *StringList) delete(index int) {
    l.list.Delete(index)
}

func (l *StringList) size() int {
    return l.list.Len()
}


func (l *StringList) join(sep string) string {
    return strings.Join(l.list, sep)
}

type WordDictionary map[byte] StringList

func (d WordDictionary) copy() WordDictionary {
    newDict := make(WordDictionary)
    for key,val := range d {
        newDict[key] = val.copy()
    }
    return newDict
}

const (
    WIN = 0
    LOSE = 1
)

func DoShiritoriToEnd(dict WordDictionary, word string, wordPath StringList, depth int) {
    depth += 1
    endIndex := len(word)-1
    lastChar := word[endIndex]
    wordList := dict[lastChar]
    if wordList.size() == 0 {
        joined := wordPath.join("-")
        winLose := depth % 2
        if winLose == WIN {
            logger.Print(" WIN:"+joined)
        } else {
            logger.Print("LOSE:"+joined)
        }
    }

    for i, nextWord := range wordList.list {
        copiedDict := dict.copy()
        copiedList := copiedDict[lastChar]
        copiedPath := wordPath.copy()
        copiedPath.add(nextWord)
        copiedList.delete(i)
        copiedDict[lastChar] = copiedList
        DoShiritoriToEnd(copiedDict, nextWord, copiedPath, depth)
    }
}
func main() {
    inFilePath := flag.String("in","","input data file")
    flag.Parse()

    if *inFilePath == "" {
        logger.Print("Input file not spepcified")
        return
    }

    // open input and output files
    logger.Printf("Input file: %s\n", *inFilePath)

    inFile,err := os.Open(*inFilePath, os.O_RDONLY, 0)
    if err != nil {
        logger.Print(err)
        return
    }

    defer func() {
        inFile.Close()
    }()

    // get the first word
    reader := bufio.NewReader(inFile)
    line, err := reader.ReadString('\n')
    if err != nil {
        logger.Print(err)
        return
    }

    firstWord := ExtractFirstAndLastLetter(line[0:len(line)-1])
    startDict := make(WordDictionary, len(CHAR_LIST))
    for {
        line, err = reader.ReadString('\n')
        if err != nil {
            //logger.Print("Error while reading file")
            break
        }

        if len(line) == 1 {
            continue
        }

        word := line[0:len(line)-1]
        word = ExtractFirstAndLastLetter(word)
        //logger.Print(word)
        firstLetter := word[0]
        list := startDict[firstLetter]
        list.add(word)
        startDict[firstLetter] = list
    }

    //logger.Print(startDict)
    var list StringList
    DoShiritoriToEnd(startDict, firstWord, list, 0)
}

入力（level1.txt）

xrdjgorj
jctbqs
jkrshfcilv
jorsezhajk
jmuholsxrc
sfdpdioerpv
silpaavk
vtvauxgju
vogmqa
vnofdnnc
vdruy
uemxza
udbjf
uvtpf
aszhnuvn
awmhkgyd
alhjlmioar
ngzmjyd
nznxgp
dvvcghudkww
dwivxpujzdo
deqwbye
wotaaadgpo
wgaiqosarg
wyeflkkwxg
wzltrkb
oqyjlgapkih
oeqgb
hwfnhx
kcofl
lubhgkf
fywggvxnixm
mxmlqukzcp
pfliwzwot
pzgilzqvwr
teytvanbrsw
tonhvlvg
gnsxomhwdz
zsxyei
ihcsq
ckxuhuty
ydlwmlwskf
rkaqnmnb
btjryytmvye
ezdfvkx
xmzxpvq

実行結果

$ ./shiritori -in=level1.txt
Input file: level1.txt
 WIN:js-sv-vu-ua-an-nd-dw-wo-oh-hx-xq
LOSE:js-sv-vu-ua-an-nd-dw-wo-ob-be-ex-xq
 WIN:js-sv-vu-ua-an-nd-dw-wg-gz-zi-iq
 WIN:js-sv-vu-ua-an-nd-dw-wg-gz-zi-iq
 WIN:js-sv-vu-ua-an-nd-dw-wb-be-ex-xq
LOSE:js-sv-vu-ua-an-nd-do-oh-hx-xq
（後略）

2010年11月12日金曜日

Goの仕様変更

Go一周年ということで久しぶりに最新版にしたところ、コンパイルエラーが発生したのでその原因を探してみた。結果、いくつか仕様に変更があったことが判明。自分に影響があったのは以下の2つ。

2010-10-13のリリースでインターフェースへのポインタが自動的に参照されなくなった
2010-10-20のリリースでlogパッケージのインターフェースが変更になった

インターフェースへのポインタの扱いが変更

リリースノートの該当箇所は以下。

The language change is that uses of pointers to interface values no longer
automatically dereference the pointer.  A pointer to an interface value is more
often a beginner’s bug than correct code.

ようするにインタフェースのポインタとか使うなよ！という変更点。自分のコードでは次のように変える必要があった。ちなみに変更前のコードではconn.Closeでコンパイルエラー（undefinedエラー）が起きてた。

変更前

func communicate(conn *net.Conn) {
    defer func() {
        conn.Close()
    }()
（以下略）

変更後

func communicate(conn net.Conn) {
    defer func() {
        conn.Close()
    }()
（以下略）

logパッケージのインタフェースが変更

個人的に結構影響のあった変更。関数名やらが変わったので色々置換する必要があった。

変更前

var logger *log.Logger = log.New(os.Stdout, nil , "", log.Lok)
logger.Log("Error")

変更後

var logger *log.Logger = log.New(os.Stdout, "", 0)
logger.Print("Error")

2010年9月25日土曜日

channelを閉じる

channelのことを改めて調べたら、closeとclosedなんていう性質があることに気付いた。

channelはcloseすると以降のsendが無視されるようになる
そのあとそのchannelから一回値をreceiveすると、以降closedによる判定がtrueになる
最後のreceiveで返ってくる値は0

この性質を利用して終了判定すれば、以前の記事で書いたようなdeferでブロック云々という問題がそもそも起こらない。しかも、channelへの入力が複数のgoroutineから行われてるときでも多分ブロックせずに正常終了できる。最初からこうすれば良かったかも。

サンプル


package main

import (
    "fmt"
)

type Message int

const (
    MSG_CLOSE Message = 0
)

type Output struct {
    data int
    response Response
}

type Response int

const (
    RESPONSE_OK Response = 0
)

type DataWithResponse struct{
    data int
    responseChannel chan Output
}

func Processor(msgChannel chan Message, inputChannel chan DataWithResponse) {
    defer func(){
        close(msgChannel)
        close(inputChannel)
    }()

    processing := true
    for processing {
        select {
        case msg := <- msgChannel:
            if msg == MSG_CLOSE {
                processing = false
            }
        case input := <- inputChannel:
            fmt.Println("Processing:",input.data)
            // process input here...
            outputData := input.data * 2
            input.responseChannel <- Output {
                outputData,
                RESPONSE_OK,
            }
        }
    }
}

func main() {
    msgChannel := make(chan Message)
    inputChannel := make(chan DataWithResponse)
    
    go Processor(msgChannel, inputChannel)

    // send input
    outputChannel := make(chan Output)
    input := DataWithResponse {
        123,
        outputChannel,
    }
    inputChannel <- input

    // receive output
    output := <- outputChannel
    fmt.Println("Processed:",output.data)

    if closed(msgChannel) {
        fmt.Println("Message channel closed 1")
    }

    msgChannel <- MSG_CLOSE

    // see what happens if we try to input more
    // while the channel is about to get closed
    inputChannel <- input
    msgChannel <- MSG_CLOSE
    inputChannel <- input
    msgChannel <- MSG_CLOSE
    inputChannel <- input
    msgChannel <- MSG_CLOSE

    if closed(msgChannel) {
        fmt.Println("Message channel closed 2")
    }
    msgClosed := <- msgChannel

    if msgClosed == MSG_CLOSE {
        fmt.Println("Closed:", msgClosed)
    }

    if closed(msgChannel) {
        fmt.Println("Message channel closed 3")
    }
}

出力

$ ./channel-test
Processing: 123
Processed: 246
Closed: 0
Message channel closed 3

出力を見ればわかるように、きっちり仕様通りになっている。closeが発行されたであろうタイミングでchannelに入力を入れてもブロックしてないし、closeされたchannelから値を読み出してはじめてclosedになってる。closeしたあとのchannelは値の0を返すようなので、CLOSE状態を判定する定数を0にしておけば可読性も良い。

ということでchannelのcloseは積極的に使いましょう。

おまけ

さっき気付いたけど、closeしたchannelにsendしすぎるとpanicが発生する。

変更点


    for {
        // see what happens if we try to input more
        // while the channel is about to get closed
        inputChannel <- input
        msgChannel <- MSG_CLOSE
        inputChannel <- input
        msgChannel <- MSG_CLOSE
        inputChannel <- input
        msgChannel <- MSG_CLOSE
    }

出力

$ ./channel-test
Processing: 123
Processed: 246
throw: too many operations on a closed channel

panic PC=0xb7615f14
（以下略）

2010年9月23日木曜日

GoはObject Orientedなこともできる言語だけれど、JavaとかC++にはあるアクセス制限のための仕組みが無い。あのpublicとかprivateとかいうヤツね。正確に言えば、パッケージ間であればアクセス制限のための仕組みはある（大文字始まりの変数・関数のみが外部に公開される）が、パッケージ内だとそういう仕組みは無い。つまりGoでstructを作るとパッケージ内では全てがpublicに公開されてしまう。

Go的OO

そんなGoでObject Orientedなコードを書く場合どうするかを試してみた。


package main

import (
    "log"
    "os"
)

var logger *log.Logger = log.New(os.Stdout, nil , "", log.Lok)

type Talker interface {
    Talk() string
}

type Cat struct {
    name string
}

func (c Cat) Talk() string {
    return c.name+": Meow meow"
}

type Dog struct {
    name string
}

func (d Dog) Talk() string {
    return d.name+": Bow wow"
}

func DoTalk(t Talker) {
    logger.Log(t.Talk())
}

func main() {
    cat := Cat{
        name:"Tama",
    }

    dog := Dog{
        name:"Pochi",
    }

    DoTalk(cat)
    DoTalk(dog)
}

上記プログラムの出力

$ ./oo-test
Tama: Meow meow
Pochi: Bow wow

nameというstring変数を持ったDogとCatというクラスと、それらのクラスのインターフェースを抽出したTalkerインターフェース。OOの教科書では良く見る例のGo版である。

ここでもしnameを名字と名前の二つの要素から構成しようと変更しようとした場合、色々と書き換えが必要になる。なので、（他のOO言語でも一緒だけど）Go的にはnameを生のstringとするのでなく、専用の型を作っておく方が安全。


type Name string

type Cat struct {
    name Name
}

func (n Name) String() string {
    return string(n)
}

func (c Cat) Talk() string {
    return c.name.String()+": Meow meow"
}

こうしておくと、先のように名字と名前にわけたくなったときでもコードの変更量が少ない。


type Name struct {
    first string
    last string
}

type Talker interface {
    Talk() string
}

type Cat struct {
    name Name
}

func (n Name) String() string {
    return n.last + n.first
}

func (c Cat) Talk() string {
    return c.name.String()+": Meow meow"
}

type Dog struct {
    name Name
}

func (d Dog) Talk() string {
    return d.name.String()+": Bow wow"
}

func DoTalk(t Talker) {
    logger.Log(t.Talk())
}

func main() {
    cat := Cat{
        name: Name{
            first:"たま",
            last:"斎藤",
        },
    }

    dog := Dog{
        name: Name{
            first:"ポチ",
            last:"五十嵐",
        },
    }

    DoTalk(cat)
    DoTalk(dog)
}

出力

$ ./oo-test
斎藤たま: Meow meow
五十嵐ポチ: Bow wow

更に言うと、上記のようにString()という関数でNameの文字列を返すようにしておくと、fmt.PrintfでもそもままNameが渡せるようになるので二度おいしい。これが実現可能なのは、fmt側で専用のStringerインタフェースを定義しているから。


    dog := Dog{
        name: Name{
            first:"ポチ",
            last:"五十嵐",
        },
    }

    fmt.Printf("name: %s\n",dog.name)

出力

name: 五十嵐ポチ

まとめ

そんなわけでObject OrientedなGoを書いてみた。基本はアクセス制限の効かない普通のOO言語と言ったところ。一つ違うのがGoのもつインタフェースの仕組みで、標準ライブラリが受けいれるインタフェースに沿えば自作したクラスであっても標準ライブラリにそのまま渡せるというのは便利。

型はパッケージ内・パッケージ間に関わらず積極的に定義する。型チェックの厳しさがGoの強みなので、それを活かすのが重要。ついでに変更にも強くなる。
標準ライブラリがサポートしているインタフェースにあわせて関数を定義すると色々便利。

2010年9月14日火曜日

deferを使うときの注意

2010/09/25 追記

こっちのやり方のほうがいいかも

Goでコードを書いてるとdeferを積極的に使いたくなるんだけど、それで調子に乗ってdeferしまくってたら罠にはまった。

具体的には、あるバージョンからstring.Splitの仕様が変わって、それに気付かず古いコードのままでコンパイルして実行したらある地点から処理が進まなくなった。でしばらく調査してわかったのが、処理が進まなくなったのはpanicが発生してdeferが意図したタイミングでないときに走り、結果として処理がブロックしていたからということ。

deferのなかでブロックする

検証用にサンプルソースを用意した。重要なのは func process()のなか。panicを起こさなければ正常終了するけれど、panicを起こさせると全スレッドがデッドロックを起こす。


package main

import (
    "log"
    "os"
)

var logger *log.Logger = log.New(os.Stdout, nil , "", log.Lok)

type Message int
type Input int
type Output int

type InputWithResponse struct {
    data Input
    responseChannel chan Output
}

const (
    MSG_FINISHED Message = 0
    MSG_ABORT Message = 1
)

const (
    OUT_DATA Output = 0
)

const (
    IN_DATA Input = 0
)


func process(input InputWithResponse) {
    logger.Log("Processing input:", input.data)

    // we simulate a panic here.
    panic("process()")

    // send back the processed result
    input.responseChannel <- OUT_DATA
}

func Processor(msgChannel chan Message, inputChannel chan InputWithResponse) {
    // send a message when we're done
    defer func() {
        msgChannel <- MSG_FINISHED
    }()

    run := true
    for run {
        select {
        case msg := <- msgChannel:
            switch msg {
            case MSG_ABORT:
                logger.Log("ABORT message received.")
                run = false
            }
        case input := <- inputChannel:
            logger.Log("Received input:", input)
            process(input)
        }
    }
    logger.Log("Finished processing.")
}

func main() {
    msgChannel := make(chan Message) // create a message channel
    inputChannel := make(chan InputWithResponse) // create a I/O channel
    logger.Log("Start processing.")
    go Processor(msgChannel, inputChannel)

    outputChannel := make(chan Output)
    input := InputWithResponse {
        IN_DATA,
        outputChannel,
    }

    inputChannel <- input
    output := <- outputChannel // get processed data
    logger.Log("Processed output:", output)

    msgChannel <- MSG_ABORT // stop the processor

    _ = <- msgChannel // wait for the processor to end

    logger.Log("Exiting.")
}

process()のなかでpanic()を呼ばなかった場合の出力

Start processing.
Received input: {0 0xb77ad780}
Processing input: 0
Processed output: 0
ABORT message received.
Finished processing.
Exiting.

process()のなかでpanicした場合の出力

Start processing.
Received input: {0 0xb761b780}
Processing input: 0
throw: all goroutines are asleep - deadlock!

panic PC=0xb761e080
throw+0x49 /home/masato/lib/google-go/src/pkg/runtime/runtime.c:73
        throw(0xffffffff, 0x80993af)
nextgandunlock+0x19a /home/masato/lib/google-go/src/pkg/runtime/proc.c:329
        nextgandunlock()
scheduler+0x158 /home/masato/lib/google-go/src/pkg/runtime/proc.c:521
        scheduler()
mstart+0x75 /home/masato/lib/google-go/src/pkg/runtime/proc.c:389
        mstart()
clone+0x90 /home/masato/lib/google-go/src/pkg/runtime/linux/386/sys.s:198
        clone()
（以下略）

デッドロックが起きる理由というのが、deferした関数がpanicの直後に処理されるから。流れとしては下のような感じ。

panicを起こすと、その時点でprocess()の実行はreturnする
するとprocess()のなかで起きるはずだったresponseChannelへの書き込みが発生しない
結果main()がresponseChannelの返答を待っているところでブロックする
また、panicが上位に伝播する過程でProcessor()のdeferが実行され、そこでもmsgChannelへの書きこみがブロックする
msgChannelから読むはずのmain()はresponseChannelの返答をずっと待っている
デッドロック完成！

デッドロックさせないためには

試したところ以下の方法が有効だった。

deferの先頭周辺（ブロックしてしまう処理の前）にrecover()を挿入する
msgChannelをbuffered channelに置きかえる
process()をgoroutineとして実行する

recover()を挿入する

recover()を挿入するのがおそらく最も正しいやり方。こうすることで意図しないエラーが発生した場合どうするかを明示的に書くことができる。具体的には、deferする関数を下のように書きかえれば、デッドロックでは無く通常のpanicとして異常終了させられる。


defer func() {
    if x := recover(); x != nil {
        panic(x) // go back to panicking
    }
    msgChannel <- MSG_FINISHED
}()

ドキュメントで言うとこのへんに詳細が記述されている。

buffered channelに置きかえる

msgChannelをmakeするところで、"make(chan Message, 5)"のようにバッファつきのチャンネルを作ればdeferのなかでブロックすることも無くなるので、recover()しなくても上位にpanicが伝播する。

process()をgoroutineとして実行する

どうもgoroutineとして実行された関数は、panicを親ルーチンへ伝播させるわけではなく、いきなりもっと上位まで伝播させるようになるらしい。つまり、"process(input)"と記述されているところを"go process(input)"と変えるだけでprocess()内のpanicが最上位まで伝播するようになる、ということ。

processをgoroutineとして実行した場合の出力

Start processing.
Received input: {0 0xb77ad780}
Processing input: 0
panic: process()

panic PC=0xb7791150
runtime.panic+0xa9 /home/masato/lib/google-go/src/pkg/runtime/proc.c:1015
        runtime.panic(0x0, 0xb7791180)
main.process+0xd8 /home/masato/desk/programs/go/lock-test.go:40
        main.process(0x809d268, 0xb778c870)
goexit /home/masato/lib/google-go/src/pkg/runtime/proc.c:145
        goexit()
（以下略）

まとめ

ということで、Goでdeferを使うときは下位の関数が起こすかもしれないpanicに注意しましょう。なお今回のサンプルをビルドするのにつかったGoのリビジョンはr6255:1dc78bb51937でした。

deferした関数の中では極力ブロックしない
今回の例のようにdeferのなかでchannelへの書き込みをする場合、make(chan ****, 5)とかでbuffered channelを作って、ブロックしないようにする
どうしてもdeferのなかでブロックする場合は、ブロックする箇所より前にrecover()を入れて下位で発生したpanicに備える
もしくは、panicが起きそうな処理は別のgoroutineとして実行し、上位までpanicが伝播するようにしておく

登録: 投稿 (Atom)